From dbb18e21a9d3560a2232bfe9379abc1c767cd183 Mon Sep 17 00:00:00 2001 From: tis24dev Date: Wed, 11 Feb 2026 20:48:06 +0100 Subject: [PATCH 01/24] Enhance prefiltering and PBS staged restore Refine backup prefiltering: track detailed stats, skip symlinks, and avoid normalizing known structured config paths (etc/proxmox-backup, etc/pve, etc/ssh, etc/pam.d, etc/systemd/system). Normalize helpers now return a (changed,bool) flag; config normalization is limited to safe text normalization (no sorting). Tests updated and a new structured-config test added. In orchestrator, add recovery for malformed/flattened proxmox-backup datastore.cfg by detecting duplicate keys and attempting to restore from pbs_datastore_inventory.json (including a lightweight inventory parser and duplicate-key detector). Update tests to cover recovery behavior. --- internal/backup/optimizations.go | 104 ++++++++++------ internal/backup/optimizations_helpers_test.go | 28 +++-- .../backup/optimizations_structured_test.go | 77 ++++++++++++ internal/backup/optimizations_test.go | 7 +- internal/orchestrator/pbs_staged_apply.go | 111 ++++++++++++++++++ .../orchestrator/pbs_staged_apply_test.go | 69 +++++++++++ 6 files changed, 349 insertions(+), 47 deletions(-) create mode 100644 internal/backup/optimizations_structured_test.go diff --git a/internal/backup/optimizations.go b/internal/backup/optimizations.go index c4e8892..f31943f 100644 --- a/internal/backup/optimizations.go +++ b/internal/backup/optimizations.go @@ -9,7 +9,6 @@ import ( "io" "os" "path/filepath" - "sort" "strings" "github.com/tis24dev/proxsave/internal/logging" @@ -310,7 +309,37 @@ func prefilterFiles(ctx context.Context, logger *logging.Logger, root string, ma } logger.Debug("Prefiltering files under %s (max size %d bytes)", root, maxSize) - var processed int + type prefilterStats struct { + scanned int + optimized int + skippedStructured int + skippedSymlink int + } + var stats prefilterStats + + isStructuredConfigPath := func(path string) bool { + rel, err := filepath.Rel(root, path) + if err != nil { + return false + } + rel = filepath.ToSlash(filepath.Clean(rel)) + rel = strings.TrimPrefix(rel, "./") + switch { + case strings.HasPrefix(rel, "etc/proxmox-backup/"): + return true + case strings.HasPrefix(rel, "etc/pve/"): + return true + case strings.HasPrefix(rel, "etc/ssh/"): + return true + case strings.HasPrefix(rel, "etc/pam.d/"): + return true + case strings.HasPrefix(rel, "etc/systemd/system/"): + return true + default: + return false + } + } + err := filepath.WalkDir(root, func(path string, d os.DirEntry, err error) error { if err != nil { return err @@ -322,27 +351,39 @@ func prefilterFiles(ctx context.Context, logger *logging.Logger, root string, ma return nil } - info, err := d.Info() - if err != nil { + info, err := os.Lstat(path) + if err != nil || info == nil { + return nil + } + if info.Mode()&os.ModeSymlink != 0 { + stats.skippedSymlink++ + return nil + } + if !info.Mode().IsRegular() { return nil } if info.Size() == 0 || info.Size() > maxSize { return nil } + stats.scanned++ ext := strings.ToLower(filepath.Ext(path)) switch ext { case ".txt", ".log", ".md": - if err := normalizeTextFile(path); err == nil { - processed++ + if changed, err := normalizeTextFile(path); err == nil && changed { + stats.optimized++ } case ".conf", ".cfg", ".ini": - if err := normalizeConfigFile(path); err == nil { - processed++ + if isStructuredConfigPath(path) { + stats.skippedStructured++ + return nil + } + if changed, err := normalizeConfigFile(path); err == nil && changed { + stats.optimized++ } case ".json": - if err := minifyJSON(path); err == nil { - processed++ + if changed, err := minifyJSON(path); err == nil && changed { + stats.optimized++ } } return nil @@ -352,52 +393,43 @@ func prefilterFiles(ctx context.Context, logger *logging.Logger, root string, ma return fmt.Errorf("prefilter walk failed: %w", err) } - logger.Info("Prefilter completed: %d files optimized", processed) + logger.Info("Prefilter completed: optimized=%d scanned=%d skipped_structured=%d skipped_symlink=%d", stats.optimized, stats.scanned, stats.skippedStructured, stats.skippedSymlink) return nil } -func normalizeTextFile(path string) error { +func normalizeTextFile(path string) (bool, error) { data, err := os.ReadFile(path) if err != nil { - return err + return false, err } normalized := bytes.ReplaceAll(data, []byte("\r"), nil) if bytes.Equal(data, normalized) { - return nil + return false, nil } - return os.WriteFile(path, normalized, defaultChunkFilePerm) + return true, os.WriteFile(path, normalized, defaultChunkFilePerm) } -func normalizeConfigFile(path string) error { - data, err := os.ReadFile(path) - if err != nil { - return err - } - lines := strings.Split(string(data), "\n") - filtered := lines[:0] - for _, line := range lines { - line = strings.TrimSpace(line) - if line == "" || strings.HasPrefix(line, "#") || strings.HasPrefix(line, ";") { - continue - } - filtered = append(filtered, line) - } - sort.Strings(filtered) - return os.WriteFile(path, []byte(strings.Join(filtered, "\n")), defaultChunkFilePerm) +func normalizeConfigFile(path string) (bool, error) { + // Config files can be whitespace/ordering-sensitive (e.g. section headers). + // Only perform safe, semantic-preserving normalization here. + return normalizeTextFile(path) } -func minifyJSON(path string) error { +func minifyJSON(path string) (bool, error) { data, err := os.ReadFile(path) if err != nil { - return err + return false, err } var tmp any if err := json.Unmarshal(data, &tmp); err != nil { - return err + return false, err } minified, err := json.Marshal(tmp) if err != nil { - return err + return false, err + } + if bytes.Equal(bytes.TrimSpace(data), minified) { + return false, nil } - return os.WriteFile(path, minified, defaultChunkFilePerm) + return true, os.WriteFile(path, minified, defaultChunkFilePerm) } diff --git a/internal/backup/optimizations_helpers_test.go b/internal/backup/optimizations_helpers_test.go index 2c8032e..7c6fffa 100644 --- a/internal/backup/optimizations_helpers_test.go +++ b/internal/backup/optimizations_helpers_test.go @@ -5,6 +5,7 @@ import ( "encoding/json" "os" "path/filepath" + "strings" "testing" ) @@ -46,8 +47,10 @@ func TestNormalizeTextFileAndConfigAndJSON(t *testing.T) { if err := os.WriteFile(textPath, []byte("line1\r\nline2\r\n"), 0o640); err != nil { t.Fatalf("write text: %v", err) } - if err := normalizeTextFile(textPath); err != nil { + if changed, err := normalizeTextFile(textPath); err != nil { t.Fatalf("normalizeTextFile: %v", err) + } else if !changed { + t.Fatalf("expected text to be normalized") } data, _ := os.ReadFile(textPath) if bytes.Contains(data, []byte("\r")) { @@ -55,24 +58,31 @@ func TestNormalizeTextFileAndConfigAndJSON(t *testing.T) { } cfgPath := filepath.Join(tmp, "app.conf") - cfgContent := "#comment\nz=1\n\n;ignored\na=2\n" + cfgContent := "#comment\r\nz=1\r\n\r\n;ignored\r\na=2\r\n" if err := os.WriteFile(cfgPath, []byte(cfgContent), 0o640); err != nil { t.Fatalf("write conf: %v", err) } - if err := normalizeConfigFile(cfgPath); err != nil { + if changed, err := normalizeConfigFile(cfgPath); err != nil { t.Fatalf("normalizeConfigFile: %v", err) + } else if !changed { + t.Fatalf("expected config to be normalized") } cfgData, _ := os.ReadFile(cfgPath) - if string(cfgData) != "a=2\nz=1" { - t.Fatalf("config not normalized/sorted, got %q", cfgData) + if bytes.Contains(cfgData, []byte("\r")) { + t.Fatalf("expected CR removed from config, got %q", cfgData) + } + if string(cfgData) != strings.ReplaceAll(cfgContent, "\r", "") { + t.Fatalf("config contents changed unexpectedly, got %q", cfgData) } jsonPath := filepath.Join(tmp, "data.json") if err := os.WriteFile(jsonPath, []byte("{\n \"a\": 1,\n \"b\": 2\n}\n"), 0o640); err != nil { t.Fatalf("write json: %v", err) } - if err := minifyJSON(jsonPath); err != nil { + if changed, err := minifyJSON(jsonPath); err != nil { t.Fatalf("minifyJSON: %v", err) + } else if !changed { + t.Fatalf("expected JSON to be minified") } jdata, _ := os.ReadFile(jsonPath) if bytes.Contains(jdata, []byte(" ")) || bytes.Contains(jdata, []byte("\n")) { @@ -82,7 +92,7 @@ func TestNormalizeTextFileAndConfigAndJSON(t *testing.T) { if err := os.WriteFile(jsonPath, []byte("{invalid"), 0o640); err != nil { t.Fatalf("write invalid json: %v", err) } - if err := minifyJSON(jsonPath); err == nil { + if _, err := minifyJSON(jsonPath); err == nil { t.Fatalf("expected error for invalid json") } } @@ -95,8 +105,10 @@ func TestMinifyJSONKeepsData(t *testing.T) { if err := os.WriteFile(path, payload, 0o640); err != nil { t.Fatalf("write json: %v", err) } - if err := minifyJSON(path); err != nil { + if changed, err := minifyJSON(path); err != nil { t.Fatalf("minifyJSON: %v", err) + } else if !changed { + t.Fatalf("expected JSON to be minified") } roundTrip, _ := os.ReadFile(path) var decoded map[string]int diff --git a/internal/backup/optimizations_structured_test.go b/internal/backup/optimizations_structured_test.go new file mode 100644 index 0000000..1a46a16 --- /dev/null +++ b/internal/backup/optimizations_structured_test.go @@ -0,0 +1,77 @@ +package backup + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/tis24dev/proxsave/internal/logging" + "github.com/tis24dev/proxsave/internal/types" +) + +func TestPrefilterSkipsStructuredConfigs(t *testing.T) { + tmp := t.TempDir() + + // Create structured config (should be skipped) + pbsDir := filepath.Join(tmp, "etc", "proxmox-backup") + if err := os.MkdirAll(pbsDir, 0o755); err != nil { + t.Fatalf("mkdir: %v", err) + } + + pbsCfg := filepath.Join(pbsDir, "datastore.cfg") + pbsContent := "datastore: Test\n\tpath /mnt/test\n\tcomment Test DS\n" + if err := os.WriteFile(pbsCfg, []byte(pbsContent), 0o640); err != nil { + t.Fatalf("write pbs config: %v", err) + } + + // Create normal config with CRLF (should be normalized) + normalCfg := filepath.Join(tmp, "etc", "normal.cfg") + normalContent := "option1\r\noption2\r\n" + if err := os.WriteFile(normalCfg, []byte(normalContent), 0o640); err != nil { + t.Fatalf("write normal config: %v", err) + } + + // Create log file with CRLF (should be normalized) + logDir := filepath.Join(tmp, "var", "log") + if err := os.MkdirAll(logDir, 0o755); err != nil { + t.Fatalf("mkdir log: %v", err) + } + logFile := filepath.Join(logDir, "test.log") + logContent := "line1\r\nline2\r\n" + if err := os.WriteFile(logFile, []byte(logContent), 0o640); err != nil { + t.Fatalf("write log: %v", err) + } + + // Run prefilter + logger := logging.New(types.LogLevelError, false) + if err := prefilterFiles(context.Background(), logger, tmp, 8*1024*1024); err != nil { + t.Fatalf("prefilterFiles: %v", err) + } + + // Verify PBS config unchanged (TABs preserved) + pbsAfter, _ := os.ReadFile(pbsCfg) + if string(pbsAfter) != pbsContent { + t.Fatalf("PBS config was modified!\nExpected: %q\nGot: %q", pbsContent, string(pbsAfter)) + } + if !strings.Contains(string(pbsAfter), "\t") { + t.Fatalf("PBS config lost TAB indentation") + } + + // Verify normal config normalized (CRLF removed) + normalAfter, _ := os.ReadFile(normalCfg) + if strings.Contains(string(normalAfter), "\r") { + t.Fatalf("Normal config still has CRLF: %q", normalAfter) + } + expectedNormal := strings.ReplaceAll(normalContent, "\r", "") + if string(normalAfter) != expectedNormal { + t.Fatalf("Normal config not normalized correctly\nExpected: %q\nGot: %q", expectedNormal, string(normalAfter)) + } + + // Verify log normalized (CRLF removed) + logAfter, _ := os.ReadFile(logFile) + if strings.Contains(string(logAfter), "\r") { + t.Fatalf("Log file still has CRLF: %q", logAfter) + } +} diff --git a/internal/backup/optimizations_test.go b/internal/backup/optimizations_test.go index b3ae733..1cd7e0c 100644 --- a/internal/backup/optimizations_test.go +++ b/internal/backup/optimizations_test.go @@ -40,7 +40,8 @@ func TestApplyOptimizationsRunsAllStages(t *testing.T) { dupB := mustWriteFile(filepath.Join("dup", "two.txt"), "identical data") logFile := mustWriteFile(filepath.Join("logs", "app.log"), "line one\r\nline two\r\n") - confFile := mustWriteFile(filepath.Join("conf", "settings.conf"), "# comment\nkey=value\n\n;ignored\nalpha=beta\n") + confOriginal := "# comment\nkey=value\n\n;ignored\nalpha=beta\n" + confFile := mustWriteFile(filepath.Join("conf", "settings.conf"), confOriginal) jsonFile := mustWriteFile(filepath.Join("meta", "data.json"), "{\n \"a\": 1,\n \"b\": 2\n}\n") chunkTarget := mustWriteFile("chunk.bin", string(bytes.Repeat([]byte("x"), 96))) @@ -75,7 +76,7 @@ func TestApplyOptimizationsRunsAllStages(t *testing.T) { t.Fatalf("symlink data mismatch, got %q", data) } - // Prefilter should strip CR characters and comments/sort config files. + // Prefilter should strip CR characters and keep config files semantically intact. logContents, err := os.ReadFile(logFile) if err != nil { t.Fatalf("read log file: %v", err) @@ -87,7 +88,7 @@ func TestApplyOptimizationsRunsAllStages(t *testing.T) { if err != nil { t.Fatalf("read config file: %v", err) } - if string(confContents) != "alpha=beta\nkey=value" { + if string(confContents) != confOriginal { t.Fatalf("unexpected config contents: %q", confContents) } jsonContents, err := os.ReadFile(jsonFile) diff --git a/internal/orchestrator/pbs_staged_apply.go b/internal/orchestrator/pbs_staged_apply.go index 3111daa..e434cb7 100644 --- a/internal/orchestrator/pbs_staged_apply.go +++ b/internal/orchestrator/pbs_staged_apply.go @@ -2,6 +2,7 @@ package orchestrator import ( "context" + "encoding/json" "errors" "fmt" "os" @@ -170,6 +171,32 @@ func applyPBSDatastoreCfgFromStage(ctx context.Context, logger *logging.Logger, return nil } + if reason := detectPBSDatastoreCfgDuplicateKeys(blocks); reason != "" { + logger.Warning("PBS staged apply: staged datastore.cfg looks invalid (%s); attempting recovery from pbs_datastore_inventory.json", reason) + if recovered, src, recErr := loadPBSDatastoreCfgFromInventory(stageRoot); recErr != nil { + logger.Warning("PBS staged apply: unable to recover datastore.cfg from inventory (%v); leaving current configuration unchanged", recErr) + return nil + } else if strings.TrimSpace(recovered) == "" { + logger.Warning("PBS staged apply: recovered datastore.cfg from %s is empty; leaving current configuration unchanged", src) + return nil + } else { + normalized, fixed = normalizePBSDatastoreCfgContent(recovered) + if fixed > 0 { + logger.Warning("PBS staged apply: recovered datastore.cfg normalization fixed %d malformed line(s) (properties must be indented)", fixed) + } + blocks, err = parsePBSDatastoreCfgBlocks(normalized) + if err != nil { + logger.Warning("PBS staged apply: recovered datastore.cfg from %s is still invalid (%v); leaving current configuration unchanged", src, err) + return nil + } + if reason := detectPBSDatastoreCfgDuplicateKeys(blocks); reason != "" { + logger.Warning("PBS staged apply: recovered datastore.cfg from %s still looks invalid (%s); leaving current configuration unchanged", src, reason) + return nil + } + logger.Info("PBS staged apply: datastore.cfg recovered from %s", src) + } + } + var applyBlocks []pbsDatastoreBlock var deferred []pbsDatastoreBlock for _, b := range blocks { @@ -213,6 +240,90 @@ func applyPBSDatastoreCfgFromStage(ctx context.Context, logger *logging.Logger, return nil } +type pbsDatastoreInventoryRestoreLite struct { + Files map[string]struct { + Content string `json:"content"` + } `json:"files"` + Datastores []struct { + Name string `json:"name"` + Path string `json:"path"` + Comment string `json:"comment"` + } `json:"datastores"` +} + +func loadPBSDatastoreCfgFromInventory(stageRoot string) (string, string, error) { + inventoryPath := filepath.Join(stageRoot, "var/lib/proxsave-info/commands/pbs/pbs_datastore_inventory.json") + raw, err := restoreFS.ReadFile(inventoryPath) + if err != nil { + return "", "", fmt.Errorf("read inventory %s: %w", inventoryPath, err) + } + trimmed := strings.TrimSpace(string(raw)) + if trimmed == "" { + return "", "", fmt.Errorf("inventory %s is empty", inventoryPath) + } + + var report pbsDatastoreInventoryRestoreLite + if err := json.Unmarshal([]byte(trimmed), &report); err != nil { + return "", "", fmt.Errorf("parse inventory %s: %w", inventoryPath, err) + } + + if report.Files != nil { + if snap := strings.TrimSpace(report.Files["pbs_datastore_cfg"].Content); snap != "" { + return report.Files["pbs_datastore_cfg"].Content, "pbs_datastore_inventory.json.files[pbs_datastore_cfg].content", nil + } + } + + // Fallback: generate a minimal datastore.cfg from the inventory's datastore list. + var out strings.Builder + for _, ds := range report.Datastores { + name := strings.TrimSpace(ds.Name) + path := strings.TrimSpace(ds.Path) + if name == "" || path == "" { + continue + } + if out.Len() > 0 { + out.WriteString("\n") + } + out.WriteString(fmt.Sprintf("datastore: %s\n", name)) + if comment := strings.TrimSpace(ds.Comment); comment != "" { + out.WriteString(fmt.Sprintf(" comment %s\n", comment)) + } + out.WriteString(fmt.Sprintf(" path %s\n", path)) + } + + generated := strings.TrimSpace(out.String()) + if generated == "" { + return "", "", fmt.Errorf("inventory %s contains no usable datastore definitions", inventoryPath) + } + return out.String(), "pbs_datastore_inventory.json.datastores", nil +} + +func detectPBSDatastoreCfgDuplicateKeys(blocks []pbsDatastoreBlock) string { + for _, block := range blocks { + seen := map[string]int{} + for _, line := range block.Lines { + trimmed := strings.TrimSpace(line) + if trimmed == "" || strings.HasPrefix(trimmed, "#") || strings.HasPrefix(trimmed, "datastore:") { + continue + } + + fields := strings.Fields(trimmed) + if len(fields) == 0 { + continue + } + key := strings.TrimSpace(fields[0]) + if key == "" { + continue + } + seen[key]++ + if seen[key] > 1 { + return fmt.Sprintf("datastore %s has duplicate key %q", strings.TrimSpace(block.Name), key) + } + } + } + return "" +} + func parsePBSDatastoreCfgBlocks(content string) ([]pbsDatastoreBlock, error) { var blocks []pbsDatastoreBlock var current *pbsDatastoreBlock diff --git a/internal/orchestrator/pbs_staged_apply_test.go b/internal/orchestrator/pbs_staged_apply_test.go index ecb7abb..0ee7ab7 100644 --- a/internal/orchestrator/pbs_staged_apply_test.go +++ b/internal/orchestrator/pbs_staged_apply_test.go @@ -3,6 +3,7 @@ package orchestrator import ( "context" "os" + "strings" "testing" ) @@ -76,3 +77,71 @@ func TestShouldApplyPBSDatastoreBlock_AllowsMountLikePathsOnRootFS(t *testing.T) t.Fatalf("expected datastore block to be applied, got ok=false reason=%q", reason) } } + +func TestApplyPBSDatastoreCfgFromStage_RecoversFromInventoryWhenFlattened(t *testing.T) { + origFS := restoreFS + t.Cleanup(func() { restoreFS = origFS }) + + fakeFS := NewFakeFS() + t.Cleanup(func() { _ = os.RemoveAll(fakeFS.Root) }) + restoreFS = fakeFS + + stageRoot := "/stage" + + // This is a representative "flattened" datastore.cfg produced by an unsafe prefilter + // (headers separated from their respective properties). + staged := strings.Join([]string{ + "comment Local ext4 disk datastore", + "comment Synology NFS sync target", + "datastore: Data1", + "datastore: Synology-Archive", + "gc-schedule 05:00", + "gc-schedule 06:30", + "notification-mode notification-system", + "notification-mode notification-system", + "path /mnt/Synology_NFS/PBS_Backup", + "path /mnt/datastore/Data1", + "", + }, "\n") + if err := fakeFS.WriteFile(stageRoot+"/etc/proxmox-backup/datastore.cfg", []byte(staged), 0o640); err != nil { + t.Fatalf("write staged datastore.cfg: %v", err) + } + + // Inventory contains a verbatim snapshot of the original datastore.cfg, which should be preferred. + inventory := `{"files":{"pbs_datastore_cfg":{"content":"datastore: Synology-Archive\n comment Synology NFS sync target\n gc-schedule 05:00\n notification-mode notification-system\n path /mnt/Synology_NFS/PBS_Backup\n\ndatastore: Data1\n comment Local ext4 disk datastore\n gc-schedule 06:30\n notification-mode notification-system\n path /mnt/datastore/Data1\n"}}}` + if err := fakeFS.WriteFile(stageRoot+"/var/lib/proxsave-info/commands/pbs/pbs_datastore_inventory.json", []byte(inventory), 0o640); err != nil { + t.Fatalf("write inventory: %v", err) + } + + if err := applyPBSDatastoreCfgFromStage(context.Background(), newTestLogger(), stageRoot); err != nil { + t.Fatalf("applyPBSDatastoreCfgFromStage error: %v", err) + } + + out, err := fakeFS.ReadFile("/etc/proxmox-backup/datastore.cfg") + if err != nil { + t.Fatalf("read restored datastore.cfg: %v", err) + } + + blocks, err := parsePBSDatastoreCfgBlocks(string(out)) + if err != nil { + t.Fatalf("parse restored datastore.cfg: %v", err) + } + if len(blocks) != 2 { + t.Fatalf("expected 2 datastore blocks, got %d", len(blocks)) + } + if reason := detectPBSDatastoreCfgDuplicateKeys(blocks); reason != "" { + t.Fatalf("restored datastore.cfg still has duplicate keys: %s", reason) + } + + // Verify the expected datastore paths are preserved. + paths := map[string]string{} + for _, b := range blocks { + paths[b.Name] = b.Path + } + if paths["Synology-Archive"] != "/mnt/Synology_NFS/PBS_Backup" { + t.Fatalf("Synology-Archive path=%q", paths["Synology-Archive"]) + } + if paths["Data1"] != "/mnt/datastore/Data1" { + t.Fatalf("Data1 path=%q", paths["Data1"]) + } +} From 7e41b753f3ef26030baa5579005f491a7d767962 Mon Sep 17 00:00:00 2001 From: tis24dev Date: Wed, 11 Feb 2026 23:55:12 +0100 Subject: [PATCH 02/24] Add PBS notifications backup and API restore Add support for backing up PBS notifications and for applying PBS config via the proxmox-backup-manager API. Key changes: - Collector: add BackupPBSNotifications and BackupPBSNotificationsPriv flags, include notifications.cfg and notifications-priv.cfg in PBS collection logic, and write a notifications_summary.json report; update tests. - New file: collector_pbs_notifications_summary.go implements JSON summaries for notification targets, matchers and endpoints. - Config: add RESTORE_PBS_APPLY_MODE and RESTORE_PBS_STRICT settings with parsing and template defaults; add PBS notifications config flags to parsing and defaults. - Orchestrator: add pbs_api_apply.go implementing API-based restore/apply of PBS categories (with strict 1:1 reconciliation and an auto fallback to file-based apply) and wire collector overrides for notifications. - Categories: update PBS category definitions to include command outputs and notification-related files. These changes enable selective backup of PBS notification data and provide an API-first restore path (with configurable behavior and strict reconciliation option). --- docs/BACKUP_ENV_MAPPING.md | 4 + docs/CLI_REFERENCE.md | 7 + docs/CONFIGURATION.md | 88 +- docs/RESTORE_DIAGRAMS.md | 2 + docs/RESTORE_GUIDE.md | 48 +- docs/RESTORE_TECHNICAL.md | 26 + internal/backup/collector.go | 24 +- internal/backup/collector_pbs.go | 21 +- .../collector_pbs_commands_coverage_test.go | 23 +- .../collector_pbs_notifications_summary.go | 220 +++++ internal/config/config.go | 42 +- internal/config/templates/backup.env | 13 + internal/orchestrator/categories.go | 175 ++-- internal/orchestrator/orchestrator.go | 2 + internal/orchestrator/pbs_api_apply.go | 799 ++++++++++++++++++ .../pbs_notifications_api_apply.go | 242 ++++++ .../pbs_notifications_api_apply_test.go | 63 ++ internal/orchestrator/pbs_staged_apply.go | 143 +++- .../orchestrator/restore_notifications.go | 25 +- internal/orchestrator/restore_workflow_ui.go | 24 +- 20 files changed, 1846 insertions(+), 145 deletions(-) create mode 100644 internal/backup/collector_pbs_notifications_summary.go create mode 100644 internal/orchestrator/pbs_api_apply.go create mode 100644 internal/orchestrator/pbs_notifications_api_apply.go create mode 100644 internal/orchestrator/pbs_notifications_api_apply_test.go diff --git a/docs/BACKUP_ENV_MAPPING.md b/docs/BACKUP_ENV_MAPPING.md index 9d1057e..aefe46a 100644 --- a/docs/BACKUP_ENV_MAPPING.md +++ b/docs/BACKUP_ENV_MAPPING.md @@ -88,12 +88,16 @@ WEBHOOK_TIMEOUT = SAME ## Go-only variables (new) SYSTEM_ROOT_PREFIX = NEW (Go-only) → Override system root for collection (testing/chroot). Empty or "/" uses the real root. +RESTORE_PBS_APPLY_MODE = NEW (Go-only) → Restore: apply staged PBS configuration using `file`, `api`, or `auto` (default: `auto`). +RESTORE_PBS_STRICT = NEW (Go-only) → Restore: when API apply is used, remove PBS objects not present in the backup (1:1 reconciliation; destructive). BACKUP_PBS_S3_ENDPOINTS = NEW (Go-only) → Collect `s3.cfg` and S3 endpoint snapshots (PBS). BACKUP_PBS_NODE_CONFIG = NEW (Go-only) → Collect `node.cfg` and node snapshots (PBS). BACKUP_PBS_ACME_ACCOUNTS = NEW (Go-only) → Collect `acme/accounts.cfg` and ACME account snapshots (PBS). BACKUP_PBS_ACME_PLUGINS = NEW (Go-only) → Collect `acme/plugins.cfg` and ACME plugin snapshots (PBS). BACKUP_PBS_METRIC_SERVERS = NEW (Go-only) → Collect `metricserver.cfg` (PBS). BACKUP_PBS_TRAFFIC_CONTROL = NEW (Go-only) → Collect `traffic-control.cfg` and traffic-control snapshots (PBS). +BACKUP_PBS_NOTIFICATIONS = NEW (Go-only) → Collect `notifications.cfg` and notification snapshots (PBS). +BACKUP_PBS_NOTIFICATIONS_PRIV = NEW (Go-only) → Collect `notifications-priv.cfg` (PBS notification secrets/credentials). BACKUP_PBS_NETWORK_CONFIG = NEW (Go-only) → Collect `network.cfg` and network snapshots (PBS), independent from BACKUP_NETWORK_CONFIGS (system). ## Renamed variables / Supported aliases in Go diff --git a/docs/CLI_REFERENCE.md b/docs/CLI_REFERENCE.md index 4aea693..75c0374 100644 --- a/docs/CLI_REFERENCE.md +++ b/docs/CLI_REFERENCE.md @@ -789,6 +789,13 @@ CONFIG_FILE=/etc/pbs/prod.env ./build/proxsave # Force dry-run mode DRY_RUN=true ./build/proxsave +# PBS restore behavior (optional) +# Prefer API-based apply for PBS staged categories (falls back when RESTORE_PBS_APPLY_MODE=auto) +RESTORE_PBS_APPLY_MODE=api ./build/proxsave --restore + +# Strict 1:1 reconciliation for PBS (WARNING: destructive) +RESTORE_PBS_STRICT=true RESTORE_PBS_APPLY_MODE=api ./build/proxsave --restore + # Set debug level DEBUG_LEVEL=extreme ./build/proxsave --log-level debug diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 33deac6..6f09263 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -6,6 +6,7 @@ Complete reference for all 200+ configuration variables in `configs/backup.env`. - [Configuration File Location](#configuration-file-location) - [General Settings](#general-settings) +- [Restore (PBS)](#restore-pbs) - [Security Settings](#security-settings) - [Disk Space](#disk-space) - [Storage Paths](#storage-paths) @@ -74,6 +75,48 @@ PROFILING_ENABLED=true # true | false (profiles written under LOG_PA --- +## Restore (PBS) + +These options affect **restore behavior on PBS hosts only**. + +```bash +# How to apply PBS configuration during restore: +# - file: restore staged *.cfg files to /etc/proxmox-backup (legacy behavior) +# - api: apply via proxmox-backup-manager where possible +# - auto: prefer API; fall back to file-based apply on failures +RESTORE_PBS_APPLY_MODE=auto # file | api | auto + +# When true, remove PBS objects not present in the backup (1:1 reconciliation). +# WARNING: Destructive when used with api/auto (it may delete existing objects). +RESTORE_PBS_STRICT=false # true | false +``` + +### RESTORE_PBS_APPLY_MODE + +- `file`: Always restores by applying staged files under `/tmp/proxsave/restore-stage-*` back to `/etc/proxmox-backup`. +- `api`: Prefers **API-based apply** via `proxmox-backup-manager` (fails if the API apply is unavailable or errors). +- `auto` (default): Tries `api` first and falls back to `file` on failures (service start failures, missing `proxmox-backup-manager`, command errors). + +**Current API coverage** (when `api`/`auto`): +- Node + traffic control (`pbs_host`) +- Datastores + S3 endpoints (`datastore_pbs`) +- Remotes (`pbs_remotes`) +- Jobs (sync/verify/prune) (`pbs_jobs`) +- Notifications endpoints/matchers (`pbs_notifications`) + +Configs with file-only apply remain file-based (e.g. access control, tape, proxy/ACME/metricserver). + +### RESTORE_PBS_STRICT (1:1) + +When `true` and **API apply** is used, ProxSave attempts a 1:1 reconciliation by removing objects that exist on the restore host but are **not present in the backup** (for the supported PBS categories above). + +Use cases: +- Disaster recovery or rebuild where the goal is **restore 1:1** on a clean PBS install. + +Avoid enabling `RESTORE_PBS_STRICT=true` for migrations or partial restores unless you explicitly want ProxSave to delete existing PBS objects. + +--- + ## Security Settings ```bash @@ -321,7 +364,29 @@ PREFILTER_MAX_FILE_SIZE_MB=8 # Skip prefilter for files >8MB - **Smart chunking**: Splits large files for parallel processing - **Deduplication**: Detects duplicate data blocks (reduces storage) -- **Prefilter**: Analyzes small files before compression (optimizes algorithm selection) +- **Prefilter**: Applies safe, semantic-preserving normalization to small text/JSON files to improve compression (e.g. removes CR from CRLF line endings and minifies JSON). It does **not** reorder, de-indent, or strip structured configuration files, and it avoids touching Proxmox/PBS structured config paths (e.g. `etc/pve/**`, `etc/proxmox-backup/**`). + +### Prefilter (`ENABLE_PREFILTER`) — details and risks + +**What it does** (on the *staged* backup tree, before compression): +- Removes `\r` from CRLF text files (`.txt`, `.log`, `.md`, `.conf`, `.cfg`, `.ini`) to normalize line endings +- Minifies JSON (`.json`) while keeping valid JSON semantics + +**What it does not do**: +- It does **not** reorder lines, remove indentation, or otherwise rewrite whitespace/ordering-sensitive structured configs. +- It does **not** follow symlinks (symlinks are skipped). +- It skips Proxmox/PBS structured configuration paths where formatting/order matters, such as: + - `etc/pve/**` + - `etc/proxmox-backup/**` + - `etc/systemd/system/**` + - `etc/ssh/**` + - `etc/pam.d/**` + +**Why you might disable it** (even though it's safe): +- If you need maximum fidelity (bit-for-bit) of text/JSON formatting as originally collected (CRLF preservation, JSON pretty-printing, etc.) +- If you prefer the most conservative pipeline possible (forensics/compliance) + +**Important**: Prefilter never edits files on the host system — it only operates on the temporary staging directory that will be archived. --- @@ -938,6 +1003,24 @@ BACKUP_VM_CONFIGS=true # VM/CT config files # PBS datastore configs BACKUP_DATASTORE_CONFIGS=true # Datastore definitions +# S3 endpoints (used by S3 datastores) +BACKUP_PBS_S3_ENDPOINTS=true # s3.cfg (S3 endpoints, used by S3 datastores) + +# Node/global config +BACKUP_PBS_NODE_CONFIG=true # node.cfg (global PBS settings) + +# ACME +BACKUP_PBS_ACME_ACCOUNTS=true # acme/accounts.cfg +BACKUP_PBS_ACME_PLUGINS=true # acme/plugins.cfg + +# Integrations +BACKUP_PBS_METRIC_SERVERS=true # metricserver.cfg +BACKUP_PBS_TRAFFIC_CONTROL=true # traffic-control.cfg + +# Notifications +BACKUP_PBS_NOTIFICATIONS=true # notifications.cfg (targets/matchers/endpoints) +BACKUP_PBS_NOTIFICATIONS_PRIV=true # notifications-priv.cfg (secrets/credentials for endpoints) + # User and permissions BACKUP_USER_CONFIGS=true # PBS users and tokens @@ -953,6 +1036,9 @@ BACKUP_VERIFICATION_JOBS=true # Backup verification schedules # Tape backup BACKUP_TAPE_CONFIGS=true # Tape library configuration +# Network configuration (PBS) +BACKUP_PBS_NETWORK_CONFIG=true # network.cfg (PBS), independent from BACKUP_NETWORK_CONFIGS (system) + # Prune schedules BACKUP_PRUNE_SCHEDULES=true # Retention prune schedules diff --git a/docs/RESTORE_DIAGRAMS.md b/docs/RESTORE_DIAGRAMS.md index 5007fb8..674db02 100644 --- a/docs/RESTORE_DIAGRAMS.md +++ b/docs/RESTORE_DIAGRAMS.md @@ -138,6 +138,8 @@ flowchart TD style CheckboxMenu fill:#87CEEB ``` +**Note (PBS)**: Staged PBS categories can be applied either by writing staged `*.cfg` files back to `/etc/proxmox-backup` or via `proxmox-backup-manager`, depending on `RESTORE_PBS_APPLY_MODE`. + --- ## Service Management Flow diff --git a/docs/RESTORE_GUIDE.md b/docs/RESTORE_GUIDE.md index bbc30aa..4cfdc3e 100644 --- a/docs/RESTORE_GUIDE.md +++ b/docs/RESTORE_GUIDE.md @@ -86,7 +86,7 @@ Restore operations are organized into **20–22 categories** (PBS = 20, PVE = 22 Each category is handled in one of three ways: - **Normal**: extracted directly to `/` (system paths) after safety backup -- **Staged**: extracted to `/tmp/proxsave/restore-stage-*` and then applied in a controlled way (file copy/validation or `pvesh`); when staged files are written to system paths, ProxSave applies them **atomically** and enforces the final permissions/ownership (including for any created parent directories; not left to `umask`) +- **Staged**: extracted to `/tmp/proxsave/restore-stage-*` and then applied in a controlled way (file copy/validation or API apply: `pvesh`/`pveum` on PVE, `proxmox-backup-manager` on PBS); when staged files are written to system paths, ProxSave applies them **atomically** and enforces the final permissions/ownership (including for any created parent directories; not left to `umask`) - **Export-only**: extracted to an export directory for manual review (never written to system paths) ### PVE-Specific Categories (11 categories) @@ -107,17 +107,19 @@ Each category is handled in one of three ways: ### PBS-Specific Categories (9 categories) +**PBS staged apply mode**: For staged PBS categories, the apply method is controlled by `RESTORE_PBS_APPLY_MODE` (`auto` by default). When using `api`/`auto`, ProxSave applies supported PBS categories via `proxmox-backup-manager` and can optionally do strict 1:1 reconciliation with `RESTORE_PBS_STRICT=true`. + | Category | Name | Description | Paths | |----------|------|-------------|-------| | `pbs_config` | PBS Config Export | **Export-only** copy of /etc/proxmox-backup (never written to system) | `./etc/proxmox-backup/` | -| `pbs_host` | PBS Host & Integrations | **Staged** node settings, ACME, proxy, metric servers and traffic control | `./etc/proxmox-backup/node.cfg`
`./etc/proxmox-backup/proxy.cfg`
`./etc/proxmox-backup/acme/accounts.cfg`
`./etc/proxmox-backup/acme/plugins.cfg`
`./etc/proxmox-backup/metricserver.cfg`
`./etc/proxmox-backup/traffic-control.cfg` | -| `datastore_pbs` | PBS Datastore Configuration | **Staged** datastore definitions (incl. S3 endpoints) | `./etc/proxmox-backup/datastore.cfg`
`./etc/proxmox-backup/s3.cfg` | +| `pbs_host` | PBS Host & Integrations | **Staged** node settings, ACME, proxy, metric servers and traffic control (API/file apply) | `./etc/proxmox-backup/node.cfg`
`./etc/proxmox-backup/proxy.cfg`
`./etc/proxmox-backup/acme/accounts.cfg`
`./etc/proxmox-backup/acme/plugins.cfg`
`./etc/proxmox-backup/metricserver.cfg`
`./etc/proxmox-backup/traffic-control.cfg`
`./var/lib/proxsave-info/commands/pbs/node_config.json`
`./var/lib/proxsave-info/commands/pbs/acme_accounts.json`
`./var/lib/proxsave-info/commands/pbs/acme_plugins.json`
`./var/lib/proxsave-info/commands/pbs/acme_account_*_info.json`
`./var/lib/proxsave-info/commands/pbs/acme_plugin_*_config.json`
`./var/lib/proxsave-info/commands/pbs/traffic_control.json` | +| `datastore_pbs` | PBS Datastore Configuration | **Staged** datastore definitions (incl. S3 endpoints) (API/file apply) | `./etc/proxmox-backup/datastore.cfg`
`./etc/proxmox-backup/s3.cfg`
`./var/lib/proxsave-info/commands/pbs/datastore_list.json`
`./var/lib/proxsave-info/commands/pbs/datastore_*_status.json`
`./var/lib/proxsave-info/commands/pbs/s3_endpoints.json`
`./var/lib/proxsave-info/commands/pbs/s3_endpoint_*_buckets.json`
`./var/lib/proxsave-info/commands/pbs/pbs_datastore_inventory.json` | | `maintenance_pbs` | PBS Maintenance | Maintenance settings | `./etc/proxmox-backup/maintenance.cfg` | -| `pbs_jobs` | PBS Jobs | **Staged** sync/verify/prune jobs | `./etc/proxmox-backup/sync.cfg`
`./etc/proxmox-backup/verification.cfg`
`./etc/proxmox-backup/prune.cfg` | -| `pbs_remotes` | PBS Remotes | **Staged** remotes for sync/verify (may include credentials) | `./etc/proxmox-backup/remote.cfg` | -| `pbs_notifications` | PBS Notifications | **Staged** notification targets and matchers | `./etc/proxmox-backup/notifications.cfg`
`./etc/proxmox-backup/notifications-priv.cfg` | -| `pbs_access_control` | PBS Access Control | **Staged** access control + secrets restored 1:1 (root@pam safety rail) | `./etc/proxmox-backup/user.cfg`
`./etc/proxmox-backup/domains.cfg`
`./etc/proxmox-backup/acl.cfg`
`./etc/proxmox-backup/token.cfg`
`./etc/proxmox-backup/shadow.json`
`./etc/proxmox-backup/token.shadow`
`./etc/proxmox-backup/tfa.json` | -| `pbs_tape` | PBS Tape Backup | **Staged** tape config, jobs and encryption keys | `./etc/proxmox-backup/tape.cfg`
`./etc/proxmox-backup/tape-job.cfg`
`./etc/proxmox-backup/media-pool.cfg`
`./etc/proxmox-backup/tape-encryption-keys.json` | +| `pbs_jobs` | PBS Jobs | **Staged** sync/verify/prune jobs (API/file apply) | `./etc/proxmox-backup/sync.cfg`
`./etc/proxmox-backup/verification.cfg`
`./etc/proxmox-backup/prune.cfg`
`./var/lib/proxsave-info/commands/pbs/sync_jobs.json`
`./var/lib/proxsave-info/commands/pbs/verification_jobs.json`
`./var/lib/proxsave-info/commands/pbs/prune_jobs.json`
`./var/lib/proxsave-info/commands/pbs/gc_jobs.json` | +| `pbs_remotes` | PBS Remotes | **Staged** remotes for sync/verify (may include credentials) (API/file apply) | `./etc/proxmox-backup/remote.cfg`
`./var/lib/proxsave-info/commands/pbs/remote_list.json` | +| `pbs_notifications` | PBS Notifications | **Staged** notification targets and matchers (API/file apply) | `./etc/proxmox-backup/notifications.cfg`
`./etc/proxmox-backup/notifications-priv.cfg`
`./var/lib/proxsave-info/commands/pbs/notification_targets.json`
`./var/lib/proxsave-info/commands/pbs/notification_matchers.json`
`./var/lib/proxsave-info/commands/pbs/notification_endpoints_*.json` | +| `pbs_access_control` | PBS Access Control | **Staged** access control + secrets restored 1:1 (root@pam safety rail) | `./etc/proxmox-backup/user.cfg`
`./etc/proxmox-backup/domains.cfg`
`./etc/proxmox-backup/acl.cfg`
`./etc/proxmox-backup/token.cfg`
`./etc/proxmox-backup/shadow.json`
`./etc/proxmox-backup/token.shadow`
`./etc/proxmox-backup/tfa.json`
`./var/lib/proxsave-info/commands/pbs/user_list.json`
`./var/lib/proxsave-info/commands/pbs/realms_ldap.json`
`./var/lib/proxsave-info/commands/pbs/realms_ad.json`
`./var/lib/proxsave-info/commands/pbs/realms_openid.json`
`./var/lib/proxsave-info/commands/pbs/acl_list.json` | +| `pbs_tape` | PBS Tape Backup | **Staged** tape config, jobs and encryption keys | `./etc/proxmox-backup/tape.cfg`
`./etc/proxmox-backup/tape-job.cfg`
`./etc/proxmox-backup/media-pool.cfg`
`./etc/proxmox-backup/tape-encryption-keys.json`
`./var/lib/proxsave-info/commands/pbs/tape_drives.json`
`./var/lib/proxsave-info/commands/pbs/tape_changers.json`
`./var/lib/proxsave-info/commands/pbs/tape_pools.json` | ### Common Categories (11 categories) @@ -225,10 +227,10 @@ Select restore mode: - `zfs` - ZFS configuration **PBS Categories**: -- `datastore_pbs` - Datastore definitions (staged apply) +- `datastore_pbs` - Datastore definitions (staged apply; API/file controlled by `RESTORE_PBS_APPLY_MODE`) - `maintenance_pbs` - Maintenance settings -- `pbs_jobs` - Sync/verify/prune jobs (staged apply) -- `pbs_remotes` - Remotes for sync jobs (staged apply) +- `pbs_jobs` - Sync/verify/prune jobs (staged apply; API/file controlled by `RESTORE_PBS_APPLY_MODE`) +- `pbs_remotes` - Remotes for sync jobs (staged apply; API/file controlled by `RESTORE_PBS_APPLY_MODE`) - `filesystem` - /etc/fstab - `storage_stack` - Storage stack config (mount prerequisites) - `zfs` - ZFS configuration @@ -2371,6 +2373,28 @@ systemctl restart proxmox-backup proxmox-backup-proxy --- +**Issue: "Bad Request (400) parsing /etc/proxmox-backup/datastore.cfg ... duplicate property 'gc-schedule'"** + +**Cause**: `datastore.cfg` is malformed (multiple datastore definitions merged into a single block). This typically happens if the file lost its structure (header/order/indentation), leading PBS to interpret keys like `gc-schedule`, `notification-mode`, or `path` as duplicated **within the same datastore**. + +**Restore behavior**: +- ProxSave detects this condition during staged apply. +- If `var/lib/proxsave-info/commands/pbs/pbs_datastore_inventory.json` is available in the backup, ProxSave will use its embedded snapshot of the original `datastore.cfg` to recover a valid configuration. +- If recovery is not possible, ProxSave will **leave the existing** `/etc/proxmox-backup/datastore.cfg` unchanged to avoid breaking PBS. + +**Manual diagnosis**: +```bash +nl -ba /etc/proxmox-backup/datastore.cfg | sed -n '1,120p' + +# Look for duplicate keys inside the same datastore block: +awk ' +/^datastore: /{ds=$2; delete seen} +/^[[:space:]]*[A-Za-z0-9-]+[[:space:]]+/{key=$1; if(seen[key]++) printf "DUP datastore=%s key=%s line=%d: %s\n", ds, key, NR, $0} +' /etc/proxmox-backup/datastore.cfg +``` + +--- + **Issue: "unable to read prune/verification job config ... syntax error (expected header)"** **Cause**: PBS job config files (`/etc/proxmox-backup/prune.cfg`, `/etc/proxmox-backup/verification.cfg`) are empty or malformed. PBS expects a section header at the first non-comment line; an empty file can trigger parse errors. @@ -2671,7 +2695,7 @@ tar -xzf /path/to/decrypted.tar.gz ./specific/file/path A: Yes: - **Extraction**: ProxSave preserves UID/GID, mode bits and timestamps (mtime/atime) for extracted entries. -- **Staged categories**: files are extracted under `/tmp/proxsave/restore-stage-*` and then applied to system paths using atomic replace; ProxSave explicitly applies mode bits (not left to `umask`) and preserves/derives ownership/group to match expected system defaults (important on PBS, where `proxmox-backup-proxy` runs as `backup`; ProxSave also repairs common `root:root` group regressions by inheriting the destination parent directory's group). +- **Staged categories**: files are extracted under `/tmp/proxsave/restore-stage-*` and then applied to system paths using atomic replace; ProxSave explicitly applies mode bits (not left to `umask`) and preserves/derives ownership/group to match expected system defaults (important on PBS, where `proxmox-backup-proxy` runs as `backup`; ProxSave also repairs common `root:root` group regressions by inheriting the destination parent directory's group). On supported filesystems, staged writes also `fsync()` the temporary file and the destination directory to reduce the risk of incomplete writes after a crash/power loss. - **ctime**: Cannot be set (kernel-managed). --- diff --git a/docs/RESTORE_TECHNICAL.md b/docs/RESTORE_TECHNICAL.md index c7da91b..1192f67 100644 --- a/docs/RESTORE_TECHNICAL.md +++ b/docs/RESTORE_TECHNICAL.md @@ -869,6 +869,30 @@ func extractSelectiveArchive( --- +#### Phase 10: Staged Apply (PVE/PBS) + +After extraction, **staged categories** are applied from the staging directory under `/tmp/proxsave/restore-stage-*`. + +**PBS staged apply**: +- Controlled by `RESTORE_PBS_APPLY_MODE` (`file` | `api` | `auto`) and `RESTORE_PBS_STRICT`. +- `file`: applies the staged `*.cfg` files back to `/etc/proxmox-backup` (legacy behavior). +- `api`: applies supported PBS categories via `proxmox-backup-manager` (create/update/remove, with optional strict 1:1 reconciliation). +- `auto` (default): prefers `api`, falls back to `file` on failures (e.g. services cannot be started, missing CLI binary, command errors). + +**Current PBS API coverage** (when `api`/`auto`): +- `pbs_host`: node + traffic control +- `datastore_pbs`: datastores + S3 endpoints +- `pbs_remotes`: remotes +- `pbs_jobs`: sync/verify/prune jobs +- `pbs_notifications`: notification endpoints/matchers + +Other PBS categories remain file-based (e.g. access control, tape, proxy/ACME/metricserver). + +**Key code paths**: +- `internal/orchestrator/pbs_staged_apply.go` (`maybeApplyPBSConfigsFromStage`) +- `internal/orchestrator/restore_notifications.go` (`maybeApplyNotificationsFromStage`, `pbs_notifications`) +- `internal/orchestrator/pbs_api_apply.go` / `internal/orchestrator/pbs_notifications_api_apply.go` (API apply engines) + ## Category System ### Category Definition Structure @@ -1072,6 +1096,8 @@ func shouldStopPBSServices(categories []Category) bool { } ``` +**API apply note**: When `RESTORE_PBS_APPLY_MODE` is `api`/`auto`, ProxSave may start PBS services again during the **staged apply** phase to run `proxmox-backup-manager` commands (even if services were stopped earlier for safe file extraction). + ### Error Handling Philosophy **Stop Phase**: **FAIL-FAST** diff --git a/internal/backup/collector.go b/internal/backup/collector.go index 8fb0d5a..676a9f8 100644 --- a/internal/backup/collector.go +++ b/internal/backup/collector.go @@ -162,6 +162,8 @@ type CollectorConfig struct { BackupPBSAcmePlugins bool BackupPBSMetricServers bool BackupPBSTrafficControl bool + BackupPBSNotifications bool + BackupPBSNotificationsPriv bool BackupUserConfigs bool BackupRemoteConfigs bool BackupSyncJobs bool @@ -251,7 +253,7 @@ func (c *CollectorConfig) Validate() error { c.BackupPVEBackupFiles || c.BackupCephConfig || c.BackupDatastoreConfigs || c.BackupPBSS3Endpoints || c.BackupPBSNodeConfig || c.BackupPBSAcmeAccounts || c.BackupPBSAcmePlugins || c.BackupPBSMetricServers || - c.BackupPBSTrafficControl || c.BackupUserConfigs || c.BackupRemoteConfigs || + c.BackupPBSTrafficControl || c.BackupPBSNotifications || c.BackupUserConfigs || c.BackupRemoteConfigs || c.BackupSyncJobs || c.BackupVerificationJobs || c.BackupTapeConfigs || c.BackupPBSNetworkConfig || c.BackupPruneSchedules || c.BackupPxarFiles || c.BackupNetworkConfigs || c.BackupAptSources || c.BackupCronJobs || @@ -334,15 +336,17 @@ func GetDefaultCollectorConfig() *CollectorConfig { BackupDatastoreConfigs: true, BackupPBSS3Endpoints: true, BackupPBSNodeConfig: true, - BackupPBSAcmeAccounts: true, - BackupPBSAcmePlugins: true, - BackupPBSMetricServers: true, - BackupPBSTrafficControl: true, - BackupUserConfigs: true, - BackupRemoteConfigs: true, - BackupSyncJobs: true, - BackupVerificationJobs: true, - BackupTapeConfigs: true, + BackupPBSAcmeAccounts: true, + BackupPBSAcmePlugins: true, + BackupPBSMetricServers: true, + BackupPBSTrafficControl: true, + BackupPBSNotifications: true, + BackupPBSNotificationsPriv: true, + BackupUserConfigs: true, + BackupRemoteConfigs: true, + BackupSyncJobs: true, + BackupVerificationJobs: true, + BackupTapeConfigs: true, BackupPBSNetworkConfig: true, BackupPruneSchedules: true, BackupPxarFiles: true, diff --git a/internal/backup/collector_pbs.go b/internal/backup/collector_pbs.go index 6759b00..98212e9 100644 --- a/internal/backup/collector_pbs.go +++ b/internal/backup/collector_pbs.go @@ -214,6 +214,11 @@ func (c *Collector) collectPBSDirectories(ctx context.Context, root string) erro if !c.config.BackupPBSTrafficControl { extraExclude = append(extraExclude, "traffic-control.cfg") } + if !c.config.BackupPBSNotifications { + extraExclude = append(extraExclude, "notifications.cfg", "notifications-priv.cfg") + } else if !c.config.BackupPBSNotificationsPriv { + extraExclude = append(extraExclude, "notifications-priv.cfg") + } if !c.config.BackupUserConfigs { // User-related configs are intentionally excluded together. extraExclude = append(extraExclude, "user.cfg", "acl.cfg", "domains.cfg") @@ -281,6 +286,17 @@ func (c *Collector) collectPBSDirectories(ctx context.Context, root string) erro c.pbsManifest["traffic-control.cfg"] = c.collectPBSConfigFile(ctx, root, "traffic-control.cfg", "Traffic control rules", c.config.BackupPBSTrafficControl, "BACKUP_PBS_TRAFFIC_CONTROL") + // Notifications (targets/endpoints + matcher routing; secrets are stored in notifications-priv.cfg) + c.pbsManifest["notifications.cfg"] = c.collectPBSConfigFile(ctx, root, "notifications.cfg", + "Notifications configuration", c.config.BackupPBSNotifications, "BACKUP_PBS_NOTIFICATIONS") + privEnabled := c.config.BackupPBSNotifications && c.config.BackupPBSNotificationsPriv + privDisableHint := "BACKUP_PBS_NOTIFICATIONS_PRIV" + if !c.config.BackupPBSNotifications { + privDisableHint = "BACKUP_PBS_NOTIFICATIONS" + } + c.pbsManifest["notifications-priv.cfg"] = c.collectPBSConfigFile(ctx, root, "notifications-priv.cfg", + "Notifications secrets", privEnabled, privDisableHint) + // User configuration c.pbsManifest["user.cfg"] = c.collectPBSConfigFile(ctx, root, "user.cfg", "User configuration", c.config.BackupUserConfigs, "BACKUP_USER_CONFIGS") @@ -381,7 +397,10 @@ func (c *Collector) collectPBSCommands(ctx context.Context, datastores []pbsData } // Notifications (targets, matchers, endpoints) - c.collectPBSNotificationSnapshots(ctx, commandsDir) + if c.config.BackupPBSNotifications { + c.collectPBSNotificationSnapshots(ctx, commandsDir) + c.writePBSNotificationSummary(commandsDir) + } // User list if c.config.BackupUserConfigs { diff --git a/internal/backup/collector_pbs_commands_coverage_test.go b/internal/backup/collector_pbs_commands_coverage_test.go index 9cac09a..28bb64c 100644 --- a/internal/backup/collector_pbs_commands_coverage_test.go +++ b/internal/backup/collector_pbs_commands_coverage_test.go @@ -50,17 +50,18 @@ func TestCollectPBSCommandsWritesExpectedOutputs(t *testing.T) { "datastore_store1_status.json", "acme_accounts.json", "acme_plugins.json", - "notification_targets.json", - "notification_matchers.json", - "notification_endpoints_smtp.json", - "notification_endpoints_sendmail.json", - "notification_endpoints_gotify.json", - "notification_endpoints_webhook.json", - "user_list.json", - "realms_ldap.json", - "realms_ad.json", - "realms_openid.json", - "acl_list.json", + "notification_targets.json", + "notification_matchers.json", + "notification_endpoints_smtp.json", + "notification_endpoints_sendmail.json", + "notification_endpoints_gotify.json", + "notification_endpoints_webhook.json", + "notifications_summary.json", + "user_list.json", + "realms_ldap.json", + "realms_ad.json", + "realms_openid.json", + "acl_list.json", "remote_list.json", "sync_jobs.json", "verification_jobs.json", diff --git a/internal/backup/collector_pbs_notifications_summary.go b/internal/backup/collector_pbs_notifications_summary.go new file mode 100644 index 0000000..180e163 --- /dev/null +++ b/internal/backup/collector_pbs_notifications_summary.go @@ -0,0 +1,220 @@ +package backup + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "sort" + "strings" + "time" +) + +type pbsNotificationSnapshotSummary struct { + Present bool `json:"present"` + Bytes int64 `json:"bytes,omitempty"` + Total int `json:"total,omitempty"` + BuiltIn int `json:"built_in,omitempty"` + Custom int `json:"custom,omitempty"` + Names []string `json:"names,omitempty"` + Error string `json:"error,omitempty"` +} + +type pbsNotificationsConfigFilesSummary struct { + NotificationsCfg ManifestEntry `json:"notifications_cfg"` + NotificationsPrivCfg ManifestEntry `json:"notifications_priv_cfg"` +} + +type pbsNotificationsSummary struct { + GeneratedAt time.Time `json:"generated_at"` + Enabled bool `json:"enabled"` + PrivEnabled bool `json:"priv_enabled"` + + ConfigFiles *pbsNotificationsConfigFilesSummary `json:"config_files,omitempty"` + + Targets pbsNotificationSnapshotSummary `json:"targets"` + Matchers pbsNotificationSnapshotSummary `json:"matchers"` + Endpoints map[string]pbsNotificationSnapshotSummary `json:"endpoints"` + + Notes []string `json:"notes,omitempty"` + Warnings []string `json:"warnings,omitempty"` +} + +func (c *Collector) writePBSNotificationSummary(commandsDir string) { + if c == nil { + return + } + + summary := pbsNotificationsSummary{ + GeneratedAt: time.Now().UTC(), + Enabled: c.config != nil && c.config.BackupPBSNotifications, + PrivEnabled: c.config != nil && c.config.BackupPBSNotifications && c.config.BackupPBSNotificationsPriv, + Endpoints: make(map[string]pbsNotificationSnapshotSummary), + } + + if c.pbsManifest != nil { + summary.ConfigFiles = &pbsNotificationsConfigFilesSummary{ + NotificationsCfg: c.pbsManifest["notifications.cfg"], + NotificationsPrivCfg: c.pbsManifest["notifications-priv.cfg"], + } + } + + summary.Targets = summarizePBSNotificationSnapshot(filepath.Join(commandsDir, "notification_targets.json")) + summary.Matchers = summarizePBSNotificationSnapshot(filepath.Join(commandsDir, "notification_matchers.json")) + for _, typ := range []string{"smtp", "sendmail", "gotify", "webhook"} { + summary.Endpoints[typ] = summarizePBSNotificationSnapshot(filepath.Join(commandsDir, fmt.Sprintf("notification_endpoints_%s.json", typ))) + } + + if summary.ConfigFiles != nil { + cfg := summary.ConfigFiles.NotificationsCfg + priv := summary.ConfigFiles.NotificationsPrivCfg + + if cfg.Status != StatusCollected && cfg.Status != StatusDisabled { + if summary.Targets.Total > 0 || sumEndpointTotals(summary.Endpoints) > 0 { + summary.Warnings = append(summary.Warnings, "Notification objects detected in snapshots, but notifications.cfg was not collected (check BACKUP_PBS_NOTIFICATIONS and exclusions).") + } + } + + if priv.Status == StatusDisabled { + summary.Notes = append(summary.Notes, "notifications-priv.cfg backup is disabled (BACKUP_PBS_NOTIFICATIONS_PRIV=false); endpoint credentials/secrets will not be included.") + } else if priv.Status != StatusCollected { + if summary.Targets.Custom > 0 || sumEndpointCustom(summary.Endpoints) > 0 { + summary.Warnings = append(summary.Warnings, "Custom notification endpoints/targets detected, but notifications-priv.cfg was not collected; restore may require re-entering secrets/credentials.") + } + } + } + + // Surface important mismatches in the console log too. + if c.logger != nil { + c.logger.Info("PBS notifications snapshot summary: targets=%d matchers=%d endpoints=%d", + summary.Targets.Total, + summary.Matchers.Total, + sumEndpointTotals(summary.Endpoints), + ) + for _, note := range summary.Notes { + c.logger.Info("PBS notifications: %s", note) + } + for _, warning := range summary.Warnings { + c.logger.Warning("PBS notifications: %s", warning) + } + } + + out, err := json.MarshalIndent(summary, "", " ") + if err != nil { + c.logger.Debug("PBS notifications summary skipped: marshal error: %v", err) + return + } + + if err := c.writeReportFile(filepath.Join(commandsDir, "notifications_summary.json"), out); err != nil { + c.logger.Debug("PBS notifications summary write failed: %v", err) + } +} + +func sumEndpointTotals(endpoints map[string]pbsNotificationSnapshotSummary) int { + total := 0 + for _, s := range endpoints { + total += s.Total + } + return total +} + +func sumEndpointCustom(endpoints map[string]pbsNotificationSnapshotSummary) int { + total := 0 + for _, s := range endpoints { + total += s.Custom + } + return total +} + +func summarizePBSNotificationSnapshot(path string) pbsNotificationSnapshotSummary { + raw, err := os.ReadFile(path) + if err != nil { + if os.IsNotExist(err) { + return pbsNotificationSnapshotSummary{Present: false} + } + return pbsNotificationSnapshotSummary{ + Present: false, + Error: err.Error(), + } + } + + summary := pbsNotificationSnapshotSummary{ + Present: true, + Bytes: int64(len(raw)), + } + + trimmed := strings.TrimSpace(string(raw)) + if trimmed == "" { + return summary + } + + var payload any + if err := json.Unmarshal([]byte(trimmed), &payload); err != nil { + summary.Error = fmt.Sprintf("invalid json: %v", err) + return summary + } + + // Unwrap proxmox-backup-manager JSON envelope (common shape: {"data":[...], ...}). + if m, ok := payload.(map[string]any); ok { + if data, ok := m["data"]; ok { + payload = data + } + } + + items, ok := payload.([]any) + if !ok { + summary.Error = "unexpected json shape (expected list)" + return summary + } + + summary.Total = len(items) + + names := make([]string, 0, len(items)) + for _, item := range items { + entry, ok := item.(map[string]any) + if !ok { + continue + } + + name := firstString(entry, "name", "id", "target", "matcher") + if name != "" { + names = append(names, name) + } + + origin := strings.ToLower(strings.TrimSpace(firstString(entry, "origin"))) + switch { + case strings.Contains(origin, "built"): + summary.BuiltIn++ + case strings.Contains(origin, "custom"): + summary.Custom++ + } + } + + sort.Strings(names) + if len(names) > 100 { + names = names[:100] + } + if len(names) > 0 { + summary.Names = names + } + + return summary +} + +func firstString(entry map[string]any, keys ...string) string { + for _, key := range keys { + v, ok := entry[key] + if !ok { + continue + } + s, ok := v.(string) + if !ok { + continue + } + s = strings.TrimSpace(s) + if s != "" { + return s + } + } + return "" +} diff --git a/internal/config/config.go b/internal/config/config.go index 8c4cf28..7f07dad 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -206,6 +206,8 @@ type Config struct { BackupPBSAcmePlugins bool BackupPBSMetricServers bool BackupPBSTrafficControl bool + BackupPBSNotifications bool + BackupPBSNotificationsPriv bool BackupUserConfigs bool BackupRemoteConfigs bool BackupSyncJobs bool @@ -257,6 +259,15 @@ type Config struct { PBSPassword string // Auto-detected API token secret PBSFingerprint string // Auto-detected from PBS certificate + // Restore settings + // RestorePBSApplyMode controls how PBS config is applied during restore: + // - "file": write staged *.cfg files to /etc/proxmox-backup (legacy behavior) + // - "api": apply via proxmox-backup-manager / proxmox-tape where possible + // - "auto": prefer API; fall back to file-based apply on failures + RestorePBSApplyMode string + // RestorePBSStrict enables 1:1 reconciliation for PBS categories (remove items not present in backup). + RestorePBSStrict bool + // raw configuration map raw map[string]string } @@ -292,14 +303,15 @@ func LoadConfig(configPath string) (*Config, error) { // This allows environment variables to take precedence over file configuration func (c *Config) loadEnvOverrides() { // List of all configuration keys that can be overridden by environment variables - envKeys := []string{ - "BACKUP_ENABLED", "DRY_RUN", "DEBUG_LEVEL", "USE_COLOR", "COLORIZE_STEP_LOGS", - "PROFILING_ENABLED", - "COMPRESSION_TYPE", "COMPRESSION_LEVEL", "COMPRESSION_THREADS", "COMPRESSION_MODE", - "ENABLE_SMART_CHUNKING", "ENABLE_DEDUPLICATION", "ENABLE_PREFILTER", - "CHUNK_SIZE_MB", "CHUNK_THRESHOLD_MB", "PREFILTER_MAX_FILE_SIZE_MB", - "BACKUP_PATH", "LOG_PATH", "LOCK_PATH", "SECURE_ACCOUNT", - "SECONDARY_ENABLED", "SECONDARY_PATH", "SECONDARY_LOG_PATH", + envKeys := []string{ + "BACKUP_ENABLED", "DRY_RUN", "DEBUG_LEVEL", "USE_COLOR", "COLORIZE_STEP_LOGS", + "PROFILING_ENABLED", + "RESTORE_PBS_APPLY_MODE", "RESTORE_PBS_STRICT", + "COMPRESSION_TYPE", "COMPRESSION_LEVEL", "COMPRESSION_THREADS", "COMPRESSION_MODE", + "ENABLE_SMART_CHUNKING", "ENABLE_DEDUPLICATION", "ENABLE_PREFILTER", + "CHUNK_SIZE_MB", "CHUNK_THRESHOLD_MB", "PREFILTER_MAX_FILE_SIZE_MB", + "BACKUP_PATH", "LOG_PATH", "LOCK_PATH", "SECURE_ACCOUNT", + "SECONDARY_ENABLED", "SECONDARY_PATH", "SECONDARY_LOG_PATH", "CLOUD_ENABLED", "CLOUD_REMOTE", "CLOUD_REMOTE_PATH", "CLOUD_LOG_PATH", "CLOUD_UPLOAD_MODE", "CLOUD_PARALLEL_MAX_JOBS", "CLOUD_PARALLEL_VERIFICATION", "CLOUD_WRITE_HEALTHCHECK", @@ -347,6 +359,7 @@ func (c *Config) parse() error { if err := c.parseCollectionSettings(); err != nil { return err } + c.parseRestoreSettings() c.autoDetectPBSAuth() return nil } @@ -365,6 +378,17 @@ func (c *Config) parseGeneralSettings() { c.ColorizeStepLogs = c.getBool("COLORIZE_STEP_LOGS", true) && c.UseColor } +func (c *Config) parseRestoreSettings() { + mode := strings.ToLower(strings.TrimSpace(c.getString("RESTORE_PBS_APPLY_MODE", "auto"))) + switch mode { + case "file", "api", "auto": + default: + mode = "auto" + } + c.RestorePBSApplyMode = mode + c.RestorePBSStrict = c.getBool("RESTORE_PBS_STRICT", false) +} + func (c *Config) parseCompressionSettings() { c.CompressionType = normalizeCompressionType(c.getCompressionType("COMPRESSION_TYPE", types.CompressionXZ)) c.CompressionLevel = c.getInt("COMPRESSION_LEVEL", 6) @@ -680,6 +704,8 @@ func (c *Config) parsePBSSettings() { c.BackupPBSAcmePlugins = c.getBool("BACKUP_PBS_ACME_PLUGINS", true) c.BackupPBSMetricServers = c.getBool("BACKUP_PBS_METRIC_SERVERS", true) c.BackupPBSTrafficControl = c.getBool("BACKUP_PBS_TRAFFIC_CONTROL", true) + c.BackupPBSNotifications = c.getBool("BACKUP_PBS_NOTIFICATIONS", true) + c.BackupPBSNotificationsPriv = c.getBool("BACKUP_PBS_NOTIFICATIONS_PRIV", c.BackupPBSNotifications) c.BackupUserConfigs = c.getBool("BACKUP_USER_CONFIGS", true) c.BackupRemoteConfigs = c.getBoolWithFallback([]string{"BACKUP_REMOTE_CONFIGS", "BACKUP_REMOTE_CFG"}, true) c.BackupSyncJobs = c.getBool("BACKUP_SYNC_JOBS", true) diff --git a/internal/config/templates/backup.env b/internal/config/templates/backup.env index 7a7cd71..0c45e3e 100644 --- a/internal/config/templates/backup.env +++ b/internal/config/templates/backup.env @@ -14,6 +14,17 @@ COLORIZE_STEP_LOGS=true # Highlight "Step N/8" lines (requires USE_COLOR=tr DEBUG_LEVEL=standard # standard | advanced | extreme DRY_RUN=false # Set to false for real runs +# ---------------------------------------------------------------------- +# Restore (PBS) +# ---------------------------------------------------------------------- +# How to apply PBS configuration during restore: +# - file: restore staged *.cfg files to /etc/proxmox-backup (legacy behavior) +# - api: apply via proxmox-backup-manager / proxmox-tape where possible +# - auto: prefer API; fall back to file-based apply on failures +RESTORE_PBS_APPLY_MODE=auto # file | api | auto +# When true, remove PBS objects not present in the backup (1:1 reconciliation). +RESTORE_PBS_STRICT=false + # ---------------------------------------------------------------------- # Security # ---------------------------------------------------------------------- @@ -296,6 +307,8 @@ BACKUP_PBS_ACME_ACCOUNTS=true # acme/accounts.cfg BACKUP_PBS_ACME_PLUGINS=true # acme/plugins.cfg BACKUP_PBS_METRIC_SERVERS=true # metricserver.cfg BACKUP_PBS_TRAFFIC_CONTROL=true # traffic-control.cfg +BACKUP_PBS_NOTIFICATIONS=true # notifications.cfg (targets/matchers/endpoints) +BACKUP_PBS_NOTIFICATIONS_PRIV=true # notifications-priv.cfg (secrets/credentials for endpoints) BACKUP_USER_CONFIGS=true BACKUP_REMOTE_CONFIGS=true BACKUP_SYNC_JOBS=true diff --git a/internal/orchestrator/categories.go b/internal/orchestrator/categories.go index 053bb83..4dda31e 100644 --- a/internal/orchestrator/categories.go +++ b/internal/orchestrator/categories.go @@ -166,30 +166,41 @@ func GetAllCategories() []Category { }, ExportOnly: true, }, - { - ID: "pbs_host", - Name: "PBS Host & Integrations", - Description: "Node settings, ACME configuration, proxy, external metric servers and traffic control rules", - Type: CategoryTypePBS, - Paths: []string{ - "./etc/proxmox-backup/node.cfg", - "./etc/proxmox-backup/proxy.cfg", - "./etc/proxmox-backup/acme/accounts.cfg", - "./etc/proxmox-backup/acme/plugins.cfg", - "./etc/proxmox-backup/metricserver.cfg", - "./etc/proxmox-backup/traffic-control.cfg", + { + ID: "pbs_host", + Name: "PBS Host & Integrations", + Description: "Node settings, ACME configuration, proxy, external metric servers and traffic control rules", + Type: CategoryTypePBS, + Paths: []string{ + "./etc/proxmox-backup/node.cfg", + "./etc/proxmox-backup/proxy.cfg", + "./etc/proxmox-backup/acme/accounts.cfg", + "./etc/proxmox-backup/acme/plugins.cfg", + "./etc/proxmox-backup/metricserver.cfg", + "./etc/proxmox-backup/traffic-control.cfg", + "./var/lib/proxsave-info/commands/pbs/node_config.json", + "./var/lib/proxsave-info/commands/pbs/acme_accounts.json", + "./var/lib/proxsave-info/commands/pbs/acme_plugins.json", + "./var/lib/proxsave-info/commands/pbs/acme_account_*_info.json", + "./var/lib/proxsave-info/commands/pbs/acme_plugin_*_config.json", + "./var/lib/proxsave-info/commands/pbs/traffic_control.json", + }, }, - }, - { - ID: "datastore_pbs", - Name: "PBS Datastore Configuration", - Description: "Datastore definitions and settings (including S3 endpoint definitions)", - Type: CategoryTypePBS, - Paths: []string{ - "./etc/proxmox-backup/datastore.cfg", - "./etc/proxmox-backup/s3.cfg", + { + ID: "datastore_pbs", + Name: "PBS Datastore Configuration", + Description: "Datastore definitions and settings (including S3 endpoint definitions)", + Type: CategoryTypePBS, + Paths: []string{ + "./etc/proxmox-backup/datastore.cfg", + "./etc/proxmox-backup/s3.cfg", + "./var/lib/proxsave-info/commands/pbs/datastore_list.json", + "./var/lib/proxsave-info/commands/pbs/datastore_*_status.json", + "./var/lib/proxsave-info/commands/pbs/s3_endpoints.json", + "./var/lib/proxsave-info/commands/pbs/s3_endpoint_*_buckets.json", + "./var/lib/proxsave-info/commands/pbs/pbs_datastore_inventory.json", + }, }, - }, { ID: "maintenance_pbs", Name: "PBS Maintenance", @@ -199,63 +210,79 @@ func GetAllCategories() []Category { "./etc/proxmox-backup/maintenance.cfg", }, }, - { - ID: "pbs_jobs", - Name: "PBS Jobs", - Description: "Sync, verify, and prune job configurations", - Type: CategoryTypePBS, - Paths: []string{ - "./etc/proxmox-backup/sync.cfg", - "./etc/proxmox-backup/verification.cfg", - "./etc/proxmox-backup/prune.cfg", + { + ID: "pbs_jobs", + Name: "PBS Jobs", + Description: "Sync, verify, and prune job configurations", + Type: CategoryTypePBS, + Paths: []string{ + "./etc/proxmox-backup/sync.cfg", + "./etc/proxmox-backup/verification.cfg", + "./etc/proxmox-backup/prune.cfg", + "./var/lib/proxsave-info/commands/pbs/sync_jobs.json", + "./var/lib/proxsave-info/commands/pbs/verification_jobs.json", + "./var/lib/proxsave-info/commands/pbs/prune_jobs.json", + "./var/lib/proxsave-info/commands/pbs/gc_jobs.json", + }, }, - }, - { - ID: "pbs_remotes", - Name: "PBS Remotes", - Description: "Remote definitions for sync/verify jobs (may include credentials)", - Type: CategoryTypePBS, - Paths: []string{ - "./etc/proxmox-backup/remote.cfg", + { + ID: "pbs_remotes", + Name: "PBS Remotes", + Description: "Remote definitions for sync/verify jobs (may include credentials)", + Type: CategoryTypePBS, + Paths: []string{ + "./etc/proxmox-backup/remote.cfg", + "./var/lib/proxsave-info/commands/pbs/remote_list.json", + }, }, - }, - { - ID: "pbs_notifications", - Name: "PBS Notifications", - Description: "Notification targets and matchers", - Type: CategoryTypePBS, - Paths: []string{ - "./etc/proxmox-backup/notifications.cfg", - "./etc/proxmox-backup/notifications-priv.cfg", + { + ID: "pbs_notifications", + Name: "PBS Notifications", + Description: "Notification targets and matchers", + Type: CategoryTypePBS, + Paths: []string{ + "./etc/proxmox-backup/notifications.cfg", + "./etc/proxmox-backup/notifications-priv.cfg", + "./var/lib/proxsave-info/commands/pbs/notification_targets.json", + "./var/lib/proxsave-info/commands/pbs/notification_matchers.json", + "./var/lib/proxsave-info/commands/pbs/notification_endpoints_*.json", + }, }, - }, - { - ID: "pbs_access_control", - Name: "PBS Access Control", - Description: "Users, realms and permissions", - Type: CategoryTypePBS, - Paths: []string{ - "./etc/proxmox-backup/user.cfg", - "./etc/proxmox-backup/domains.cfg", - "./etc/proxmox-backup/acl.cfg", - "./etc/proxmox-backup/token.cfg", - "./etc/proxmox-backup/shadow.json", - "./etc/proxmox-backup/token.shadow", - "./etc/proxmox-backup/tfa.json", + { + ID: "pbs_access_control", + Name: "PBS Access Control", + Description: "Users, realms and permissions", + Type: CategoryTypePBS, + Paths: []string{ + "./etc/proxmox-backup/user.cfg", + "./etc/proxmox-backup/domains.cfg", + "./etc/proxmox-backup/acl.cfg", + "./etc/proxmox-backup/token.cfg", + "./etc/proxmox-backup/shadow.json", + "./etc/proxmox-backup/token.shadow", + "./etc/proxmox-backup/tfa.json", + "./var/lib/proxsave-info/commands/pbs/user_list.json", + "./var/lib/proxsave-info/commands/pbs/realms_ldap.json", + "./var/lib/proxsave-info/commands/pbs/realms_ad.json", + "./var/lib/proxsave-info/commands/pbs/realms_openid.json", + "./var/lib/proxsave-info/commands/pbs/acl_list.json", + }, }, - }, - { - ID: "pbs_tape", - Name: "PBS Tape Backup", - Description: "Tape jobs, pools, changers and tape encryption keys", - Type: CategoryTypePBS, - Paths: []string{ - "./etc/proxmox-backup/tape.cfg", - "./etc/proxmox-backup/tape-job.cfg", - "./etc/proxmox-backup/media-pool.cfg", - "./etc/proxmox-backup/tape-encryption-keys.json", + { + ID: "pbs_tape", + Name: "PBS Tape Backup", + Description: "Tape jobs, pools, changers and tape encryption keys", + Type: CategoryTypePBS, + Paths: []string{ + "./etc/proxmox-backup/tape.cfg", + "./etc/proxmox-backup/tape-job.cfg", + "./etc/proxmox-backup/media-pool.cfg", + "./etc/proxmox-backup/tape-encryption-keys.json", + "./var/lib/proxsave-info/commands/pbs/tape_drives.json", + "./var/lib/proxsave-info/commands/pbs/tape_changers.json", + "./var/lib/proxsave-info/commands/pbs/tape_pools.json", + }, }, - }, // Common Categories { diff --git a/internal/orchestrator/orchestrator.go b/internal/orchestrator/orchestrator.go index ddab036..a0912d8 100644 --- a/internal/orchestrator/orchestrator.go +++ b/internal/orchestrator/orchestrator.go @@ -1482,6 +1482,8 @@ func applyCollectorOverrides(cc *backup.CollectorConfig, cfg *config.Config) { cc.BackupPBSAcmePlugins = cfg.BackupPBSAcmePlugins cc.BackupPBSMetricServers = cfg.BackupPBSMetricServers cc.BackupPBSTrafficControl = cfg.BackupPBSTrafficControl + cc.BackupPBSNotifications = cfg.BackupPBSNotifications + cc.BackupPBSNotificationsPriv = cfg.BackupPBSNotificationsPriv cc.BackupUserConfigs = cfg.BackupUserConfigs cc.BackupRemoteConfigs = cfg.BackupRemoteConfigs cc.BackupSyncJobs = cfg.BackupSyncJobs diff --git a/internal/orchestrator/pbs_api_apply.go b/internal/orchestrator/pbs_api_apply.go new file mode 100644 index 0000000..d5d233d --- /dev/null +++ b/internal/orchestrator/pbs_api_apply.go @@ -0,0 +1,799 @@ +package orchestrator + +import ( + "context" + "encoding/json" + "fmt" + "os" + "sort" + "strings" + "time" + + "github.com/tis24dev/proxsave/internal/config" + "github.com/tis24dev/proxsave/internal/logging" +) + +const ( + pbsApplyModeFile = "file" + pbsApplyModeAPI = "api" + pbsApplyModeAuto = "auto" +) + +func normalizePBSApplyMode(cfg *config.Config) string { + if cfg == nil { + return pbsApplyModeAuto + } + mode := strings.ToLower(strings.TrimSpace(cfg.RestorePBSApplyMode)) + switch mode { + case pbsApplyModeFile, pbsApplyModeAPI, pbsApplyModeAuto: + return mode + default: + return pbsApplyModeAuto + } +} + +func pbsStrictRestore(cfg *config.Config) bool { + return cfg != nil && cfg.RestorePBSStrict +} + +func normalizeProxmoxCfgKey(key string) string { + key = strings.ToLower(strings.TrimSpace(key)) + key = strings.ReplaceAll(key, "_", "-") + return key +} + +func buildProxmoxManagerFlags(entries []proxmoxNotificationEntry, skipKeys ...string) []string { + if len(entries) == 0 { + return nil + } + skip := make(map[string]struct{}, len(skipKeys)+2) + for _, k := range skipKeys { + skip[normalizeProxmoxCfgKey(k)] = struct{}{} + } + // Common no-op keys + skip["digest"] = struct{}{} + skip["name"] = struct{}{} + + args := make([]string, 0, len(entries)*2) + for _, kv := range entries { + key := normalizeProxmoxCfgKey(kv.Key) + if key == "" { + continue + } + if _, ok := skip[key]; ok { + continue + } + value := strings.TrimSpace(kv.Value) + args = append(args, "--"+key) + args = append(args, value) + } + return args +} + +func popEntryValue(entries []proxmoxNotificationEntry, keys ...string) (value string, remaining []proxmoxNotificationEntry, ok bool) { + if len(entries) == 0 || len(keys) == 0 { + return "", entries, false + } + want := make(map[string]struct{}, len(keys)) + for _, k := range keys { + want[normalizeProxmoxCfgKey(k)] = struct{}{} + } + + remaining = make([]proxmoxNotificationEntry, 0, len(entries)) + for _, kv := range entries { + key := normalizeProxmoxCfgKey(kv.Key) + if _, match := want[key]; match && !ok { + value = strings.TrimSpace(kv.Value) + ok = true + continue + } + remaining = append(remaining, kv) + } + return value, remaining, ok +} + +func runPBSManagerRedacted(ctx context.Context, args []string, redactFlags []string, redactIndexes []int) ([]byte, error) { + out, err := restoreCmd.Run(ctx, "proxmox-backup-manager", args...) + if err == nil { + return out, nil + } + redacted := redactCLIArgs(args, redactFlags) + for _, idx := range redactIndexes { + if idx >= 0 && idx < len(redacted) { + redacted[idx] = "" + } + } + return out, fmt.Errorf("proxmox-backup-manager %s failed: %w", strings.Join(redacted, " "), err) +} + +func runPBSManager(ctx context.Context, args ...string) ([]byte, error) { + return runPBSManagerRedacted(ctx, args, nil, nil) +} + +func runPBSManagerSensitive(ctx context.Context, args []string, redactFlags ...string) ([]byte, error) { + return runPBSManagerRedacted(ctx, args, redactFlags, nil) +} + +func unwrapPBSJSONData(raw []byte) []byte { + trimmed := strings.TrimSpace(string(raw)) + if trimmed == "" { + return nil + } + var wrapper map[string]json.RawMessage + if err := json.Unmarshal([]byte(trimmed), &wrapper); err != nil { + return []byte(trimmed) + } + if data, ok := wrapper["data"]; ok && len(bytesTrimSpace(data)) > 0 { + return data + } + return []byte(trimmed) +} + +func bytesTrimSpace(b []byte) []byte { + return []byte(strings.TrimSpace(string(b))) +} + +func parsePBSListIDs(raw []byte, candidateKeys ...string) ([]string, error) { + data := unwrapPBSJSONData(raw) + if len(data) == 0 { + return nil, nil + } + + var rows []map[string]any + if err := json.Unmarshal(data, &rows); err != nil { + return nil, err + } + + out := make([]string, 0, len(rows)) + seen := make(map[string]struct{}, len(rows)) + for _, row := range rows { + id := "" + for _, k := range candidateKeys { + k = strings.TrimSpace(k) + if k == "" { + continue + } + if v, ok := row[k]; ok { + if s, ok := v.(string); ok { + id = strings.TrimSpace(s) + break + } + } + } + if id == "" { + for _, v := range row { + if s, ok := v.(string); ok { + id = strings.TrimSpace(s) + break + } + } + } + if id == "" { + continue + } + if _, ok := seen[id]; ok { + continue + } + seen[id] = struct{}{} + out = append(out, id) + } + sort.Strings(out) + return out, nil +} + +func ensurePBSServicesForAPI(ctx context.Context, logger *logging.Logger) error { + if logger == nil { + logger = logging.GetDefaultLogger() + } + + if !isRealRestoreFS(restoreFS) { + return fmt.Errorf("non-system filesystem in use") + } + if os.Geteuid() != 0 { + return fmt.Errorf("requires root privileges") + } + + if _, err := restoreCmd.Run(ctx, "proxmox-backup-manager", "version"); err != nil { + return fmt.Errorf("proxmox-backup-manager not available: %w", err) + } + + // Best-effort: ensure services are started before API apply. + startCtx, cancel := context.WithTimeout(ctx, 2*serviceStartTimeout+serviceVerifyTimeout+5*time.Second) + defer cancel() + if err := startPBSServices(startCtx, logger); err != nil { + return err + } + return nil +} + +func applyPBSRemoteCfgViaAPI(ctx context.Context, logger *logging.Logger, stageRoot string, strict bool) error { + remoteRaw, present, err := readStageFileOptional(stageRoot, "etc/proxmox-backup/remote.cfg") + if err != nil { + return err + } + if !present { + return nil + } + sections, err := parseProxmoxNotificationSections(remoteRaw) + if err != nil { + return fmt.Errorf("parse staged remote.cfg: %w", err) + } + + desired := make(map[string]proxmoxNotificationSection, len(sections)) + for _, s := range sections { + name := strings.TrimSpace(s.Name) + if name == "" { + continue + } + desired[name] = s + } + + if strict { + out, err := runPBSManager(ctx, "remote", "list", "--output-format=json") + if err != nil { + return err + } + current, err := parsePBSListIDs(out, "id", "name") + if err != nil { + return fmt.Errorf("parse remote list: %w", err) + } + for _, id := range current { + if _, ok := desired[id]; ok { + continue + } + if _, err := runPBSManager(ctx, "remote", "remove", id); err != nil { + logger.Warning("PBS API apply: remote remove %s failed (continuing): %v", id, err) + } + } + } + + ids := make([]string, 0, len(desired)) + for id := range desired { + ids = append(ids, id) + } + sort.Strings(ids) + for _, id := range ids { + s := desired[id] + flags := buildProxmoxManagerFlags(s.Entries) + createArgs := append([]string{"remote", "create", id}, flags...) + if _, err := runPBSManagerSensitive(ctx, createArgs, "--password"); err != nil { + updateArgs := append([]string{"remote", "update", id}, flags...) + if _, upErr := runPBSManagerSensitive(ctx, updateArgs, "--password"); upErr != nil { + return fmt.Errorf("remote %s: %v (create) / %v (update)", id, err, upErr) + } + } + } + + return nil +} + +func applyPBSS3CfgViaAPI(ctx context.Context, logger *logging.Logger, stageRoot string, strict bool) error { + s3Raw, present, err := readStageFileOptional(stageRoot, "etc/proxmox-backup/s3.cfg") + if err != nil { + return err + } + if !present { + return nil + } + sections, err := parseProxmoxNotificationSections(s3Raw) + if err != nil { + return fmt.Errorf("parse staged s3.cfg: %w", err) + } + + desired := make(map[string]proxmoxNotificationSection, len(sections)) + for _, s := range sections { + id := strings.TrimSpace(s.Name) + if id == "" { + continue + } + desired[id] = s + } + + if strict { + out, err := runPBSManager(ctx, "s3", "endpoint", "list", "--output-format=json") + if err != nil { + return err + } + current, err := parsePBSListIDs(out, "id", "name") + if err != nil { + return fmt.Errorf("parse s3 endpoint list: %w", err) + } + for _, id := range current { + if _, ok := desired[id]; ok { + continue + } + if _, err := runPBSManager(ctx, "s3", "endpoint", "remove", id); err != nil { + logger.Warning("PBS API apply: s3 endpoint remove %s failed (continuing): %v", id, err) + } + } + } + + ids := make([]string, 0, len(desired)) + for id := range desired { + ids = append(ids, id) + } + sort.Strings(ids) + for _, id := range ids { + s := desired[id] + flags := buildProxmoxManagerFlags(s.Entries) + createArgs := append([]string{"s3", "endpoint", "create", id}, flags...) + if _, err := runPBSManagerSensitive(ctx, createArgs, "--access-key", "--secret-key"); err != nil { + updateArgs := append([]string{"s3", "endpoint", "update", id}, flags...) + if _, upErr := runPBSManagerSensitive(ctx, updateArgs, "--access-key", "--secret-key"); upErr != nil { + return fmt.Errorf("s3 endpoint %s: %v (create) / %v (update)", id, err, upErr) + } + } + } + + return nil +} + +func applyPBSDatastoreCfgViaAPI(ctx context.Context, logger *logging.Logger, stageRoot string, strict bool) error { + dsRaw, present, err := readStageFileOptional(stageRoot, "etc/proxmox-backup/datastore.cfg") + if err != nil { + return err + } + if !present { + return nil + } + sections, err := parseProxmoxNotificationSections(dsRaw) + if err != nil { + return fmt.Errorf("parse staged datastore.cfg: %w", err) + } + + desired := make(map[string]proxmoxNotificationSection, len(sections)) + for _, s := range sections { + name := strings.TrimSpace(s.Name) + if name == "" { + continue + } + desired[name] = s + } + + type dsRow struct { + Name string `json:"name"` + Store string `json:"store"` + ID string `json:"id"` + Path string `json:"path"` + } + currentPaths := make(map[string]string) + if out, err := runPBSManager(ctx, "datastore", "list", "--output-format=json"); err == nil { + var rows []dsRow + if err := json.Unmarshal(unwrapPBSJSONData(out), &rows); err == nil { + for _, row := range rows { + name := strings.TrimSpace(row.Name) + if name == "" { + name = strings.TrimSpace(row.Store) + } + if name == "" { + name = strings.TrimSpace(row.ID) + } + if name == "" { + continue + } + currentPaths[name] = strings.TrimSpace(row.Path) + } + } + } + + if strict { + current := make([]string, 0, len(currentPaths)) + for name := range currentPaths { + current = append(current, name) + } + sort.Strings(current) + for _, name := range current { + if _, ok := desired[name]; ok { + continue + } + if _, err := runPBSManager(ctx, "datastore", "remove", name); err != nil { + logger.Warning("PBS API apply: datastore remove %s failed (continuing): %v", name, err) + } + } + } + + names := make([]string, 0, len(desired)) + for name := range desired { + names = append(names, name) + } + sort.Strings(names) + for _, name := range names { + s := desired[name] + path, entries, ok := popEntryValue(s.Entries, "path") + if !ok || strings.TrimSpace(path) == "" { + logger.Warning("PBS API apply: datastore %s missing path; skipping", name) + continue + } + flags := buildProxmoxManagerFlags(entries) + if currentPath, exists := currentPaths[name]; exists { + if currentPath != "" && strings.TrimSpace(currentPath) != strings.TrimSpace(path) { + if strict { + if _, err := runPBSManager(ctx, "datastore", "remove", name); err != nil { + return fmt.Errorf("datastore %s: path mismatch (%s != %s) and remove failed: %w", name, currentPath, path, err) + } + createArgs := append([]string{"datastore", "create", name, path}, flags...) + if _, err := runPBSManager(ctx, createArgs...); err != nil { + return fmt.Errorf("datastore %s: recreate after path mismatch failed: %w", name, err) + } + continue + } + logger.Warning("PBS API apply: datastore %s path mismatch (%s != %s); leaving path unchanged (enable RESTORE_PBS_STRICT=true for 1:1)", name, currentPath, path) + } + + updateArgs := append([]string{"datastore", "update", name}, flags...) + if _, err := runPBSManager(ctx, updateArgs...); err != nil { + return fmt.Errorf("datastore %s: update failed: %w", name, err) + } + continue + } + + createArgs := append([]string{"datastore", "create", name, path}, flags...) + if _, err := runPBSManager(ctx, createArgs...); err != nil { + updateArgs := append([]string{"datastore", "update", name}, flags...) + if _, upErr := runPBSManager(ctx, updateArgs...); upErr != nil { + return fmt.Errorf("datastore %s: %v (create) / %v (update)", name, err, upErr) + } + } + } + + return nil +} + +func applyPBSSyncCfgViaAPI(ctx context.Context, logger *logging.Logger, stageRoot string, strict bool) error { + raw, present, err := readStageFileOptional(stageRoot, "etc/proxmox-backup/sync.cfg") + if err != nil { + return err + } + if !present { + return nil + } + sections, err := parseProxmoxNotificationSections(raw) + if err != nil { + return fmt.Errorf("parse staged sync.cfg: %w", err) + } + + desired := make(map[string]proxmoxNotificationSection, len(sections)) + for _, s := range sections { + id := strings.TrimSpace(s.Name) + if id == "" { + continue + } + desired[id] = s + } + + if strict { + out, err := runPBSManager(ctx, "sync-job", "list", "--output-format=json") + if err != nil { + return err + } + current, err := parsePBSListIDs(out, "id", "name") + if err != nil { + return fmt.Errorf("parse sync-job list: %w", err) + } + for _, id := range current { + if _, ok := desired[id]; ok { + continue + } + if _, err := runPBSManager(ctx, "sync-job", "remove", id); err != nil { + logger.Warning("PBS API apply: sync-job remove %s failed (continuing): %v", id, err) + } + } + } + + ids := make([]string, 0, len(desired)) + for id := range desired { + ids = append(ids, id) + } + sort.Strings(ids) + for _, id := range ids { + s := desired[id] + flags := buildProxmoxManagerFlags(s.Entries) + createArgs := append([]string{"sync-job", "create", id}, flags...) + if _, err := runPBSManager(ctx, createArgs...); err != nil { + updateArgs := append([]string{"sync-job", "update", id}, flags...) + if _, upErr := runPBSManager(ctx, updateArgs...); upErr != nil { + return fmt.Errorf("sync-job %s: %v (create) / %v (update)", id, err, upErr) + } + } + } + + return nil +} + +func applyPBSVerificationCfgViaAPI(ctx context.Context, logger *logging.Logger, stageRoot string, strict bool) error { + raw, present, err := readStageFileOptional(stageRoot, "etc/proxmox-backup/verification.cfg") + if err != nil { + return err + } + if !present { + return nil + } + sections, err := parseProxmoxNotificationSections(raw) + if err != nil { + return fmt.Errorf("parse staged verification.cfg: %w", err) + } + + desired := make(map[string]proxmoxNotificationSection, len(sections)) + for _, s := range sections { + id := strings.TrimSpace(s.Name) + if id == "" { + continue + } + desired[id] = s + } + + if strict { + out, err := runPBSManager(ctx, "verify-job", "list", "--output-format=json") + if err != nil { + return err + } + current, err := parsePBSListIDs(out, "id", "name") + if err != nil { + return fmt.Errorf("parse verify-job list: %w", err) + } + for _, id := range current { + if _, ok := desired[id]; ok { + continue + } + if _, err := runPBSManager(ctx, "verify-job", "remove", id); err != nil { + logger.Warning("PBS API apply: verify-job remove %s failed (continuing): %v", id, err) + } + } + } + + ids := make([]string, 0, len(desired)) + for id := range desired { + ids = append(ids, id) + } + sort.Strings(ids) + for _, id := range ids { + s := desired[id] + flags := buildProxmoxManagerFlags(s.Entries) + createArgs := append([]string{"verify-job", "create", id}, flags...) + if _, err := runPBSManager(ctx, createArgs...); err != nil { + updateArgs := append([]string{"verify-job", "update", id}, flags...) + if _, upErr := runPBSManager(ctx, updateArgs...); upErr != nil { + return fmt.Errorf("verify-job %s: %v (create) / %v (update)", id, err, upErr) + } + } + } + + return nil +} + +func applyPBSPruneCfgViaAPI(ctx context.Context, logger *logging.Logger, stageRoot string, strict bool) error { + raw, present, err := readStageFileOptional(stageRoot, "etc/proxmox-backup/prune.cfg") + if err != nil { + return err + } + if !present { + return nil + } + sections, err := parseProxmoxNotificationSections(raw) + if err != nil { + return fmt.Errorf("parse staged prune.cfg: %w", err) + } + + desired := make(map[string]proxmoxNotificationSection, len(sections)) + for _, s := range sections { + id := strings.TrimSpace(s.Name) + if id == "" { + continue + } + desired[id] = s + } + + if strict { + out, err := runPBSManager(ctx, "prune-job", "list", "--output-format=json") + if err != nil { + return err + } + current, err := parsePBSListIDs(out, "id", "name") + if err != nil { + return fmt.Errorf("parse prune-job list: %w", err) + } + for _, id := range current { + if _, ok := desired[id]; ok { + continue + } + if _, err := runPBSManager(ctx, "prune-job", "remove", id); err != nil { + logger.Warning("PBS API apply: prune-job remove %s failed (continuing): %v", id, err) + } + } + } + + ids := make([]string, 0, len(desired)) + for id := range desired { + ids = append(ids, id) + } + sort.Strings(ids) + for _, id := range ids { + s := desired[id] + flags := buildProxmoxManagerFlags(s.Entries) + createArgs := append([]string{"prune-job", "create", id}, flags...) + if _, err := runPBSManager(ctx, createArgs...); err != nil { + updateArgs := append([]string{"prune-job", "update", id}, flags...) + if _, upErr := runPBSManager(ctx, updateArgs...); upErr != nil { + return fmt.Errorf("prune-job %s: %v (create) / %v (update)", id, err, upErr) + } + } + } + + return nil +} + +func applyPBSTrafficControlCfgViaAPI(ctx context.Context, logger *logging.Logger, stageRoot string, strict bool) error { + raw, present, err := readStageFileOptional(stageRoot, "etc/proxmox-backup/traffic-control.cfg") + if err != nil { + return err + } + if !present { + return nil + } + sections, err := parseProxmoxNotificationSections(raw) + if err != nil { + return fmt.Errorf("parse staged traffic-control.cfg: %w", err) + } + + desired := make(map[string]proxmoxNotificationSection, len(sections)) + for _, s := range sections { + name := strings.TrimSpace(s.Name) + if name == "" { + continue + } + desired[name] = s + } + + if strict { + out, err := runPBSManager(ctx, "traffic-control", "list", "--output-format=json") + if err != nil { + return err + } + current, err := parsePBSListIDs(out, "name", "id") + if err != nil { + return fmt.Errorf("parse traffic-control list: %w", err) + } + for _, name := range current { + if _, ok := desired[name]; ok { + continue + } + if _, err := runPBSManager(ctx, "traffic-control", "remove", name); err != nil { + logger.Warning("PBS API apply: traffic-control remove %s failed (continuing): %v", name, err) + } + } + } + + names := make([]string, 0, len(desired)) + for name := range desired { + names = append(names, name) + } + sort.Strings(names) + for _, name := range names { + s := desired[name] + flags := buildProxmoxManagerFlags(s.Entries) + createArgs := append([]string{"traffic-control", "create", name}, flags...) + if _, err := runPBSManager(ctx, createArgs...); err != nil { + updateArgs := append([]string{"traffic-control", "update", name}, flags...) + if _, upErr := runPBSManager(ctx, updateArgs...); upErr != nil { + return fmt.Errorf("traffic-control %s: %v (create) / %v (update)", name, err, upErr) + } + } + } + + return nil +} + +func applyPBSNodeCfgViaAPI(ctx context.Context, logger *logging.Logger, stageRoot string) error { + raw, present, err := readStageFileOptional(stageRoot, "etc/proxmox-backup/node.cfg") + if err != nil { + return err + } + if !present { + return nil + } + sections, err := parseProxmoxNotificationSections(raw) + if err != nil { + return fmt.Errorf("parse staged node.cfg: %w", err) + } + if len(sections) == 0 { + return nil + } + // node update applies to the local node; use the first section. + flags := buildProxmoxManagerFlags(sections[0].Entries) + args := append([]string{"node", "update"}, flags...) + if _, err := runPBSManager(ctx, args...); err != nil { + return err + } + return nil +} + +func applyPBSCategoriesViaAPI(ctx context.Context, logger *logging.Logger, plan *RestorePlan, cfg *config.Config, stageRoot string) error { + if plan == nil || plan.SystemType != SystemTypePBS { + return nil + } + mode := normalizePBSApplyMode(cfg) + if mode == pbsApplyModeFile { + return nil + } + + strict := pbsStrictRestore(cfg) + + if err := ensurePBSServicesForAPI(ctx, logger); err != nil { + if mode == pbsApplyModeAuto { + logger.Warning("PBS API apply unavailable; falling back to file-based apply: %v", err) + return nil + } + return err + } + + // Apply in dependency-safe order. + if plan.HasCategoryID("pbs_host") { + if err := applyPBSNodeCfgViaAPI(ctx, logger, stageRoot); err != nil { + if mode == pbsApplyModeAuto { + logger.Warning("PBS API apply: node.cfg failed (continuing with file-based apply): %v", err) + } else { + return err + } + } + if err := applyPBSTrafficControlCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + if mode == pbsApplyModeAuto { + logger.Warning("PBS API apply: traffic-control.cfg failed (continuing with file-based apply): %v", err) + } else { + return err + } + } + } + + if plan.HasCategoryID("datastore_pbs") { + if err := applyPBSS3CfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + if mode == pbsApplyModeAuto { + logger.Warning("PBS API apply: s3.cfg failed (continuing with file-based apply): %v", err) + } else { + return err + } + } + if err := applyPBSDatastoreCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + if mode == pbsApplyModeAuto { + logger.Warning("PBS API apply: datastore.cfg failed (continuing with file-based apply): %v", err) + } else { + return err + } + } + } + + if plan.HasCategoryID("pbs_remotes") { + if err := applyPBSRemoteCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + if mode == pbsApplyModeAuto { + logger.Warning("PBS API apply: remote.cfg failed (continuing with file-based apply): %v", err) + } else { + return err + } + } + } + + if plan.HasCategoryID("pbs_jobs") { + if err := applyPBSSyncCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + if mode == pbsApplyModeAuto { + logger.Warning("PBS API apply: sync.cfg failed (continuing with file-based apply): %v", err) + } else { + return err + } + } + if err := applyPBSVerificationCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + if mode == pbsApplyModeAuto { + logger.Warning("PBS API apply: verification.cfg failed (continuing with file-based apply): %v", err) + } else { + return err + } + } + if err := applyPBSPruneCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + if mode == pbsApplyModeAuto { + logger.Warning("PBS API apply: prune.cfg failed (continuing with file-based apply): %v", err) + } else { + return err + } + } + } + + return nil +} diff --git a/internal/orchestrator/pbs_notifications_api_apply.go b/internal/orchestrator/pbs_notifications_api_apply.go new file mode 100644 index 0000000..122d659 --- /dev/null +++ b/internal/orchestrator/pbs_notifications_api_apply.go @@ -0,0 +1,242 @@ +package orchestrator + +import ( + "context" + "fmt" + "sort" + "strings" + + "github.com/tis24dev/proxsave/internal/logging" +) + +func applyPBSNotificationsViaAPI(ctx context.Context, logger *logging.Logger, stageRoot string, strict bool) error { + cfgRaw, cfgPresent, err := readStageFileOptional(stageRoot, "etc/proxmox-backup/notifications.cfg") + if err != nil { + return err + } + if !cfgPresent { + return nil + } + privRaw, _, err := readStageFileOptional(stageRoot, "etc/proxmox-backup/notifications-priv.cfg") + if err != nil { + return err + } + + cfgSections, err := parseProxmoxNotificationSections(cfgRaw) + if err != nil { + return fmt.Errorf("parse staged notifications.cfg: %w", err) + } + privSections, err := parseProxmoxNotificationSections(privRaw) + if err != nil { + return fmt.Errorf("parse staged notifications-priv.cfg: %w", err) + } + + privByKey := make(map[string][]proxmoxNotificationEntry) + privRedactFlagsByKey := make(map[string][]string) + for _, s := range privSections { + if strings.TrimSpace(s.Type) == "" || strings.TrimSpace(s.Name) == "" { + continue + } + key := fmt.Sprintf("%s:%s", strings.TrimSpace(s.Type), strings.TrimSpace(s.Name)) + privByKey[key] = append([]proxmoxNotificationEntry{}, s.Entries...) + privRedactFlagsByKey[key] = append([]string(nil), notificationRedactFlagsFromEntries(s.Entries)...) + } + + type endpointSection struct { + section proxmoxNotificationSection + redactFlags []string + redactIndex []int + positional []string + sectionKey string + endpointType string + } + + var endpoints []endpointSection + var matchers []proxmoxNotificationSection + + for _, s := range cfgSections { + typ := strings.TrimSpace(s.Type) + name := strings.TrimSpace(s.Name) + if typ == "" || name == "" { + continue + } + switch typ { + case "smtp", "sendmail", "gotify", "webhook": + key := fmt.Sprintf("%s:%s", typ, name) + if priv, ok := privByKey[key]; ok && len(priv) > 0 { + s.Entries = append(s.Entries, priv...) + } + redactFlags := notificationRedactFlags(s) + if extra := privRedactFlagsByKey[key]; len(extra) > 0 { + redactFlags = append(redactFlags, extra...) + } + + pos := []string{} + entries := s.Entries + + switch typ { + case "smtp": + recipients, remaining, ok := popEntryValue(entries, "recipients", "mailto", "mail-to") + if !ok || strings.TrimSpace(recipients) == "" { + logger.Warning("PBS notifications API apply: smtp endpoint %s missing recipients; skipping", name) + continue + } + pos = append(pos, recipients) + s.Entries = remaining + case "sendmail": + mailto, remaining, ok := popEntryValue(entries, "mailto", "mail-to", "recipients") + if !ok || strings.TrimSpace(mailto) == "" { + logger.Warning("PBS notifications API apply: sendmail endpoint %s missing mailto; skipping", name) + continue + } + pos = append(pos, mailto) + s.Entries = remaining + case "gotify": + server, remaining, ok := popEntryValue(entries, "server") + if !ok || strings.TrimSpace(server) == "" { + logger.Warning("PBS notifications API apply: gotify endpoint %s missing server; skipping", name) + continue + } + token, remaining2, ok := popEntryValue(remaining, "token") + if !ok || strings.TrimSpace(token) == "" { + logger.Warning("PBS notifications API apply: gotify endpoint %s missing token; skipping", name) + continue + } + pos = append(pos, server, token) + s.Entries = remaining2 + case "webhook": + url, remaining, ok := popEntryValue(entries, "url") + if !ok || strings.TrimSpace(url) == "" { + logger.Warning("PBS notifications API apply: webhook endpoint %s missing url; skipping", name) + continue + } + pos = append(pos, url) + s.Entries = remaining + } + + redactIndex := []int(nil) + if typ == "gotify" { + // proxmox-backup-manager notification endpoint gotify create/update + redactIndex = []int{6} + } + + endpoints = append(endpoints, endpointSection{ + section: s, + redactFlags: redactFlags, + redactIndex: redactIndex, + positional: pos, + sectionKey: key, + endpointType: typ, + }) + case "matcher": + matchers = append(matchers, s) + default: + logger.Warning("PBS notifications API apply: unknown section %q (%s); skipping", typ, name) + } + } + + // Endpoints first (matchers refer to targets/endpoints). + for _, typ := range []string{"smtp", "sendmail", "gotify", "webhook"} { + desiredNames := make(map[string]endpointSection) + for _, e := range endpoints { + if e.endpointType != typ { + continue + } + name := strings.TrimSpace(e.section.Name) + if name == "" { + continue + } + desiredNames[name] = e + } + + names := make([]string, 0, len(desiredNames)) + for name := range desiredNames { + names = append(names, name) + } + sort.Strings(names) + + if strict { + out, err := runPBSManager(ctx, "notification", "endpoint", typ, "list", "--output-format=json") + if err != nil { + return err + } + current, err := parsePBSListIDs(out, "name", "id") + if err != nil { + return fmt.Errorf("parse endpoint list (%s): %w", typ, err) + } + for _, name := range current { + if _, ok := desiredNames[name]; ok { + continue + } + if _, err := runPBSManager(ctx, "notification", "endpoint", typ, "remove", name); err != nil { + // Built-in endpoints may not be removable; keep going. + logger.Warning("PBS notifications API apply: endpoint remove %s:%s failed (continuing): %v", typ, name, err) + } + } + } + + for _, name := range names { + e := desiredNames[name] + flags := buildProxmoxManagerFlags(e.section.Entries) + createArgs := append([]string{"notification", "endpoint", typ, "create", name}, e.positional...) + createArgs = append(createArgs, flags...) + if _, err := runPBSManagerRedacted(ctx, createArgs, e.redactFlags, e.redactIndex); err != nil { + updateArgs := append([]string{"notification", "endpoint", typ, "update", name}, e.positional...) + updateArgs = append(updateArgs, flags...) + if _, upErr := runPBSManagerRedacted(ctx, updateArgs, e.redactFlags, e.redactIndex); upErr != nil { + return fmt.Errorf("endpoint %s:%s: %v (create) / %v (update)", typ, name, err, upErr) + } + } + } + } + + // Then matchers. + desiredMatchers := make(map[string]proxmoxNotificationSection, len(matchers)) + for _, m := range matchers { + name := strings.TrimSpace(m.Name) + if name == "" { + continue + } + desiredMatchers[name] = m + } + + matcherNames := make([]string, 0, len(desiredMatchers)) + for name := range desiredMatchers { + matcherNames = append(matcherNames, name) + } + sort.Strings(matcherNames) + + if strict { + out, err := runPBSManager(ctx, "notification", "matcher", "list", "--output-format=json") + if err != nil { + return err + } + current, err := parsePBSListIDs(out, "name", "id") + if err != nil { + return fmt.Errorf("parse matcher list: %w", err) + } + for _, name := range current { + if _, ok := desiredMatchers[name]; ok { + continue + } + if _, err := runPBSManager(ctx, "notification", "matcher", "remove", name); err != nil { + // Built-in matchers may not be removable; keep going. + logger.Warning("PBS notifications API apply: matcher remove %s failed (continuing): %v", name, err) + } + } + } + + for _, name := range matcherNames { + m := desiredMatchers[name] + flags := buildProxmoxManagerFlags(m.Entries) + createArgs := append([]string{"notification", "matcher", "create", name}, flags...) + if _, err := runPBSManager(ctx, createArgs...); err != nil { + updateArgs := append([]string{"notification", "matcher", "update", name}, flags...) + if _, upErr := runPBSManager(ctx, updateArgs...); upErr != nil { + return fmt.Errorf("matcher %s: %v (create) / %v (update)", name, err, upErr) + } + } + } + + return nil +} diff --git a/internal/orchestrator/pbs_notifications_api_apply_test.go b/internal/orchestrator/pbs_notifications_api_apply_test.go new file mode 100644 index 0000000..51a0ca7 --- /dev/null +++ b/internal/orchestrator/pbs_notifications_api_apply_test.go @@ -0,0 +1,63 @@ +package orchestrator + +import ( + "context" + "os" + "reflect" + "testing" + + "github.com/tis24dev/proxsave/internal/logging" + "github.com/tis24dev/proxsave/internal/types" +) + +func TestApplyPBSNotificationsViaAPI_CreatesEndpointAndMatcher(t *testing.T) { + origCmd := restoreCmd + origFS := restoreFS + t.Cleanup(func() { + restoreCmd = origCmd + restoreFS = origFS + }) + + fakeFS := NewFakeFS() + t.Cleanup(func() { _ = os.RemoveAll(fakeFS.Root) }) + restoreFS = fakeFS + + stageRoot := "/stage" + + if err := fakeFS.WriteFile(stageRoot+"/etc/proxmox-backup/notifications.cfg", []byte( + "smtp: Gmail-relay\n"+ + " recipients user@example.com\n"+ + " from-address pbs@example.com\n"+ + " server smtp.gmail.com\n"+ + " port 587\n"+ + " username user\n"+ + "\n"+ + "matcher: default-matcher\n"+ + " target Gmail-relay\n", + ), 0o640); err != nil { + t.Fatalf("write staged notifications.cfg: %v", err) + } + if err := fakeFS.WriteFile(stageRoot+"/etc/proxmox-backup/notifications-priv.cfg", []byte( + "smtp: Gmail-relay\n"+ + " password secret123\n", + ), 0o600); err != nil { + t.Fatalf("write staged notifications-priv.cfg: %v", err) + } + + runner := &fakeCommandRunner{} + restoreCmd = runner + + logger := logging.New(types.LogLevelDebug, false) + if err := applyPBSNotificationsViaAPI(context.Background(), logger, stageRoot, false); err != nil { + t.Fatalf("applyPBSNotificationsViaAPI error: %v", err) + } + + want := []string{ + "proxmox-backup-manager notification endpoint smtp create Gmail-relay user@example.com --from-address pbs@example.com --server smtp.gmail.com --port 587 --username user --password secret123", + "proxmox-backup-manager notification matcher create default-matcher --target Gmail-relay", + } + if !reflect.DeepEqual(runner.calls, want) { + t.Fatalf("calls=%v want %v", runner.calls, want) + } +} + diff --git a/internal/orchestrator/pbs_staged_apply.go b/internal/orchestrator/pbs_staged_apply.go index e434cb7..1cda623 100644 --- a/internal/orchestrator/pbs_staged_apply.go +++ b/internal/orchestrator/pbs_staged_apply.go @@ -9,10 +9,11 @@ import ( "path/filepath" "strings" + "github.com/tis24dev/proxsave/internal/config" "github.com/tis24dev/proxsave/internal/logging" ) -func maybeApplyPBSConfigsFromStage(ctx context.Context, logger *logging.Logger, plan *RestorePlan, stageRoot string, dryRun bool) (err error) { +func maybeApplyPBSConfigsFromStage(ctx context.Context, logger *logging.Logger, plan *RestorePlan, cfg *config.Config, stageRoot string, dryRun bool) (err error) { if plan == nil || plan.SystemType != SystemTypePBS { return nil } @@ -40,34 +41,148 @@ func maybeApplyPBSConfigsFromStage(ctx context.Context, logger *logging.Logger, return nil } - if plan.HasCategoryID("datastore_pbs") { - if err := applyPBSS3CfgFromStage(ctx, logger, stageRoot); err != nil { - logger.Warning("PBS staged apply: s3.cfg: %v", err) + mode := normalizePBSApplyMode(cfg) + strict := pbsStrictRestore(cfg) + + needsAPI := mode != pbsApplyModeFile && (plan.HasCategoryID("pbs_host") || plan.HasCategoryID("datastore_pbs") || plan.HasCategoryID("pbs_remotes") || plan.HasCategoryID("pbs_jobs")) + if needsAPI { + if err := ensurePBSServicesForAPI(ctx, logger); err != nil { + if mode == pbsApplyModeAuto { + logger.Warning("PBS API apply unavailable; falling back to file-based staged apply: %v", err) + mode = pbsApplyModeFile + } else { + return err + } } - if err := applyPBSDatastoreCfgFromStage(ctx, logger, stageRoot); err != nil { - logger.Warning("PBS staged apply: datastore.cfg: %v", err) + } + + if plan.HasCategoryID("pbs_host") { + // Always restore file-only configs (no stable API coverage yet). + // ACME should be applied before node config (node.cfg references ACME accounts/plugins). + for _, rel := range []string{ + "etc/proxmox-backup/acme/accounts.cfg", + "etc/proxmox-backup/acme/plugins.cfg", + "etc/proxmox-backup/metricserver.cfg", + "etc/proxmox-backup/proxy.cfg", + } { + if err := applyPBSConfigFileFromStage(ctx, logger, stageRoot, rel); err != nil { + logger.Warning("PBS staged apply: %s: %v", rel, err) + } + } + + if mode != pbsApplyModeFile { + if err := applyPBSTrafficControlCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + if mode == pbsApplyModeAuto { + logger.Warning("PBS API apply: traffic-control failed; falling back to file-based: %v", err) + _ = applyPBSConfigFileFromStage(ctx, logger, stageRoot, "etc/proxmox-backup/traffic-control.cfg") + } else { + return err + } + } + if err := applyPBSNodeCfgViaAPI(ctx, logger, stageRoot); err != nil { + if mode == pbsApplyModeAuto { + logger.Warning("PBS API apply: node config failed; falling back to file-based: %v", err) + _ = applyPBSConfigFileFromStage(ctx, logger, stageRoot, "etc/proxmox-backup/node.cfg") + } else { + return err + } + } + } else { + for _, rel := range []string{ + "etc/proxmox-backup/traffic-control.cfg", + "etc/proxmox-backup/node.cfg", + } { + if err := applyPBSConfigFileFromStage(ctx, logger, stageRoot, rel); err != nil { + logger.Warning("PBS staged apply: %s: %v", rel, err) + } + } } } - if plan.HasCategoryID("pbs_jobs") { - if err := applyPBSJobConfigsFromStage(ctx, logger, stageRoot); err != nil { - logger.Warning("PBS staged apply: job configs: %v", err) + + if plan.HasCategoryID("datastore_pbs") { + if mode != pbsApplyModeFile { + if err := applyPBSS3CfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + if mode == pbsApplyModeAuto { + logger.Warning("PBS API apply: s3.cfg failed; falling back to file-based: %v", err) + _ = applyPBSS3CfgFromStage(ctx, logger, stageRoot) + } else { + return err + } + } + if err := applyPBSDatastoreCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + if mode == pbsApplyModeAuto { + logger.Warning("PBS API apply: datastore.cfg failed; falling back to file-based: %v", err) + _ = applyPBSDatastoreCfgFromStage(ctx, logger, stageRoot) + } else { + return err + } + } + } else { + if err := applyPBSS3CfgFromStage(ctx, logger, stageRoot); err != nil { + logger.Warning("PBS staged apply: s3.cfg: %v", err) + } + if err := applyPBSDatastoreCfgFromStage(ctx, logger, stageRoot); err != nil { + logger.Warning("PBS staged apply: datastore.cfg: %v", err) + } } } + if plan.HasCategoryID("pbs_remotes") { - if err := applyPBSRemoteCfgFromStage(ctx, logger, stageRoot); err != nil { - logger.Warning("PBS staged apply: remote.cfg: %v", err) + if mode != pbsApplyModeFile { + if err := applyPBSRemoteCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + if mode == pbsApplyModeAuto { + logger.Warning("PBS API apply: remote.cfg failed; falling back to file-based: %v", err) + _ = applyPBSRemoteCfgFromStage(ctx, logger, stageRoot) + } else { + return err + } + } + } else { + if err := applyPBSRemoteCfgFromStage(ctx, logger, stageRoot); err != nil { + logger.Warning("PBS staged apply: remote.cfg: %v", err) + } } } - if plan.HasCategoryID("pbs_host") { - if err := applyPBSHostConfigsFromStage(ctx, logger, stageRoot); err != nil { - logger.Warning("PBS staged apply: host configs: %v", err) + + if plan.HasCategoryID("pbs_jobs") { + if mode != pbsApplyModeFile { + if err := applyPBSSyncCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + if mode == pbsApplyModeAuto { + logger.Warning("PBS API apply: sync jobs failed; falling back to file-based: %v", err) + _ = applyPBSJobConfigsFromStage(ctx, logger, stageRoot) + } else { + return err + } + } + if err := applyPBSVerificationCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + if mode == pbsApplyModeAuto { + logger.Warning("PBS API apply: verification jobs failed; falling back to file-based: %v", err) + _ = applyPBSJobConfigsFromStage(ctx, logger, stageRoot) + } else { + return err + } + } + if err := applyPBSPruneCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + if mode == pbsApplyModeAuto { + logger.Warning("PBS API apply: prune jobs failed; falling back to file-based: %v", err) + _ = applyPBSJobConfigsFromStage(ctx, logger, stageRoot) + } else { + return err + } + } + } else { + if err := applyPBSJobConfigsFromStage(ctx, logger, stageRoot); err != nil { + logger.Warning("PBS staged apply: job configs: %v", err) + } } } + if plan.HasCategoryID("pbs_tape") { if err := applyPBSTapeConfigsFromStage(ctx, logger, stageRoot); err != nil { logger.Warning("PBS staged apply: tape configs: %v", err) } } + return nil } diff --git a/internal/orchestrator/restore_notifications.go b/internal/orchestrator/restore_notifications.go index 6181c09..16c1acd 100644 --- a/internal/orchestrator/restore_notifications.go +++ b/internal/orchestrator/restore_notifications.go @@ -8,6 +8,7 @@ import ( "path/filepath" "strings" + "github.com/tis24dev/proxsave/internal/config" "github.com/tis24dev/proxsave/internal/logging" ) @@ -23,7 +24,7 @@ type proxmoxNotificationSection struct { RedactFlags []string } -func maybeApplyNotificationsFromStage(ctx context.Context, logger *logging.Logger, plan *RestorePlan, stageRoot string, dryRun bool) (err error) { +func maybeApplyNotificationsFromStage(ctx context.Context, logger *logging.Logger, plan *RestorePlan, cfg *config.Config, stageRoot string, dryRun bool) (err error) { if plan == nil { return nil } @@ -56,7 +57,27 @@ func maybeApplyNotificationsFromStage(ctx context.Context, logger *logging.Logge if !plan.HasCategoryID("pbs_notifications") { return nil } - return applyPBSNotificationsFromStage(ctx, logger, stageRoot) + mode := normalizePBSApplyMode(cfg) + strict := pbsStrictRestore(cfg) + if mode == pbsApplyModeFile { + return applyPBSNotificationsFromStage(ctx, logger, stageRoot) + } + if err := ensurePBSServicesForAPI(ctx, logger); err != nil { + if mode == pbsApplyModeAuto { + logger.Warning("PBS notifications API apply unavailable; falling back to file-based apply: %v", err) + return applyPBSNotificationsFromStage(ctx, logger, stageRoot) + } + return err + } + if err := applyPBSNotificationsViaAPI(ctx, logger, stageRoot, strict); err != nil { + if mode == pbsApplyModeAuto { + logger.Warning("PBS notifications API apply failed; falling back to file-based apply: %v", err) + return applyPBSNotificationsFromStage(ctx, logger, stageRoot) + } + return err + } + logger.Info("PBS notifications applied via API") + return nil case SystemTypePVE: if !plan.HasCategoryID("pve_notifications") { return nil diff --git a/internal/orchestrator/restore_workflow_ui.go b/internal/orchestrator/restore_workflow_ui.go index 871ad9b..b691bcc 100644 --- a/internal/orchestrator/restore_workflow_ui.go +++ b/internal/orchestrator/restore_workflow_ui.go @@ -508,12 +508,12 @@ func runRestoreWorkflowWithUI(ctx context.Context, cfg *config.Config, logger *l } logger.Info("") - if err := maybeApplyPBSConfigsFromStage(ctx, logger, plan, stageRoot, cfg.DryRun); err != nil { - if errors.Is(err, ErrRestoreAborted) || input.IsAborted(err) { - return err - } - restoreHadWarnings = true - logger.Warning("PBS staged config apply: %v", err) + if err := maybeApplyPBSConfigsFromStage(ctx, logger, plan, cfg, stageRoot, cfg.DryRun); err != nil { + if errors.Is(err, ErrRestoreAborted) || input.IsAborted(err) { + return err + } + restoreHadWarnings = true + logger.Warning("PBS staged config apply: %v", err) } if err := maybeApplyPVEConfigsFromStage(ctx, logger, plan, stageRoot, destRoot, cfg.DryRun); err != nil { if errors.Is(err, ErrRestoreAborted) || input.IsAborted(err) { @@ -559,12 +559,12 @@ func runRestoreWorkflowWithUI(ctx context.Context, cfg *config.Config, logger *l logger.Warning("Access control staged apply: %v", err) } } - if err := maybeApplyNotificationsFromStage(ctx, logger, plan, stageRoot, cfg.DryRun); err != nil { - if errors.Is(err, ErrRestoreAborted) || input.IsAborted(err) { - return err - } - restoreHadWarnings = true - logger.Warning("Notifications staged apply: %v", err) + if err := maybeApplyNotificationsFromStage(ctx, logger, plan, cfg, stageRoot, cfg.DryRun); err != nil { + if errors.Is(err, ErrRestoreAborted) || input.IsAborted(err) { + return err + } + restoreHadWarnings = true + logger.Warning("Notifications staged apply: %v", err) } } From 0502ef1d19e2ff7b9eda9908dfd868ecbb585cd2 Mon Sep 17 00:00:00 2001 From: tis24dev Date: Thu, 12 Feb 2026 00:45:36 +0100 Subject: [PATCH 03/24] Make PBS restore behavior interactive (UI-driven) Move PBS restore reconciliation out of backup.env and into an interactive choice during restores. Introduces a PBSRestoreBehavior enum (Merge vs Clean 1:1) and threads it through RestorePlan; removes RESTORE_PBS_APPLY_MODE/RESTORE_PBS_STRICT config parsing and env template entries. Staged PBS apply logic now uses the selected behavior: API-based applies are preferred and file-based fallbacks are only allowed in Clean mode, while Merge mode skips destructive/API-unavailable actions. Updated CLI and TUI to prompt for the behavior, adjusted notification/apply helpers, tests, and documentation to reflect the interactive selection and new default apply semantics. --- docs/BACKUP_ENV_MAPPING.md | 3 +- docs/CLI_REFERENCE.md | 8 +- docs/CONFIGURATION.md | 35 +----- docs/RESTORE_DIAGRAMS.md | 2 +- docs/RESTORE_GUIDE.md | 12 +- docs/RESTORE_TECHNICAL.md | 13 +- internal/config/config.go | 88 +++++-------- internal/config/templates/backup.env | 11 -- internal/orchestrator/pbs_api_apply.go | 117 +----------------- internal/orchestrator/pbs_restore_behavior.go | 33 +++++ internal/orchestrator/pbs_staged_apply.go | 100 +++++++-------- .../orchestrator/restore_notifications.go | 25 ++-- internal/orchestrator/restore_plan.go | 2 +- internal/orchestrator/restore_tui.go | 84 +++++++++++++ internal/orchestrator/restore_workflow_ui.go | 40 ++++-- .../restore_workflow_ui_helpers_test.go | 70 ++++++----- internal/orchestrator/workflow_ui.go | 1 + internal/orchestrator/workflow_ui_cli.go | 26 ++++ .../orchestrator/workflow_ui_tui_restore.go | 5 +- 19 files changed, 342 insertions(+), 333 deletions(-) create mode 100644 internal/orchestrator/pbs_restore_behavior.go diff --git a/docs/BACKUP_ENV_MAPPING.md b/docs/BACKUP_ENV_MAPPING.md index aefe46a..db10996 100644 --- a/docs/BACKUP_ENV_MAPPING.md +++ b/docs/BACKUP_ENV_MAPPING.md @@ -88,8 +88,7 @@ WEBHOOK_TIMEOUT = SAME ## Go-only variables (new) SYSTEM_ROOT_PREFIX = NEW (Go-only) → Override system root for collection (testing/chroot). Empty or "/" uses the real root. -RESTORE_PBS_APPLY_MODE = NEW (Go-only) → Restore: apply staged PBS configuration using `file`, `api`, or `auto` (default: `auto`). -RESTORE_PBS_STRICT = NEW (Go-only) → Restore: when API apply is used, remove PBS objects not present in the backup (1:1 reconciliation; destructive). +NOTE: PBS restore behavior is selected interactively during `--restore` and is intentionally not configured via `backup.env`. BACKUP_PBS_S3_ENDPOINTS = NEW (Go-only) → Collect `s3.cfg` and S3 endpoint snapshots (PBS). BACKUP_PBS_NODE_CONFIG = NEW (Go-only) → Collect `node.cfg` and node snapshots (PBS). BACKUP_PBS_ACME_ACCOUNTS = NEW (Go-only) → Collect `acme/accounts.cfg` and ACME account snapshots (PBS). diff --git a/docs/CLI_REFERENCE.md b/docs/CLI_REFERENCE.md index 75c0374..eae8335 100644 --- a/docs/CLI_REFERENCE.md +++ b/docs/CLI_REFERENCE.md @@ -789,12 +789,8 @@ CONFIG_FILE=/etc/pbs/prod.env ./build/proxsave # Force dry-run mode DRY_RUN=true ./build/proxsave -# PBS restore behavior (optional) -# Prefer API-based apply for PBS staged categories (falls back when RESTORE_PBS_APPLY_MODE=auto) -RESTORE_PBS_APPLY_MODE=api ./build/proxsave --restore - -# Strict 1:1 reconciliation for PBS (WARNING: destructive) -RESTORE_PBS_STRICT=true RESTORE_PBS_APPLY_MODE=api ./build/proxsave --restore +# PBS restore behavior +# Selected interactively during `--restore` on PBS hosts (Merge vs Clean 1:1). # Set debug level DEBUG_LEVEL=extreme ./build/proxsave --log-level debug diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 6f09263..6603768 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -77,44 +77,21 @@ PROFILING_ENABLED=true # true | false (profiles written under LOG_PA ## Restore (PBS) -These options affect **restore behavior on PBS hosts only**. +PBS restore behavior is chosen **interactively at restore time** on PBS hosts (not via `backup.env`). -```bash -# How to apply PBS configuration during restore: -# - file: restore staged *.cfg files to /etc/proxmox-backup (legacy behavior) -# - api: apply via proxmox-backup-manager where possible -# - auto: prefer API; fall back to file-based apply on failures -RESTORE_PBS_APPLY_MODE=auto # file | api | auto - -# When true, remove PBS objects not present in the backup (1:1 reconciliation). -# WARNING: Destructive when used with api/auto (it may delete existing objects). -RESTORE_PBS_STRICT=false # true | false -``` +You will be asked to choose a behavior: +- **Merge (existing PBS)**: intended for restoring onto an already operational PBS; ProxSave applies supported PBS categories via `proxmox-backup-manager` without deleting existing objects that are not in the backup. +- **Clean 1:1 (fresh PBS install)**: intended for restoring onto a new, clean PBS; ProxSave attempts to make supported PBS objects match the backup (may remove objects that exist on the system but are not in the backup). -### RESTORE_PBS_APPLY_MODE +ProxSave applies supported PBS staged categories via API automatically (and may fall back to file-based staged apply only in **Clean 1:1** mode). -- `file`: Always restores by applying staged files under `/tmp/proxsave/restore-stage-*` back to `/etc/proxmox-backup`. -- `api`: Prefers **API-based apply** via `proxmox-backup-manager` (fails if the API apply is unavailable or errors). -- `auto` (default): Tries `api` first and falls back to `file` on failures (service start failures, missing `proxmox-backup-manager`, command errors). - -**Current API coverage** (when `api`/`auto`): +**Current API coverage**: - Node + traffic control (`pbs_host`) - Datastores + S3 endpoints (`datastore_pbs`) - Remotes (`pbs_remotes`) - Jobs (sync/verify/prune) (`pbs_jobs`) - Notifications endpoints/matchers (`pbs_notifications`) -Configs with file-only apply remain file-based (e.g. access control, tape, proxy/ACME/metricserver). - -### RESTORE_PBS_STRICT (1:1) - -When `true` and **API apply** is used, ProxSave attempts a 1:1 reconciliation by removing objects that exist on the restore host but are **not present in the backup** (for the supported PBS categories above). - -Use cases: -- Disaster recovery or rebuild where the goal is **restore 1:1** on a clean PBS install. - -Avoid enabling `RESTORE_PBS_STRICT=true` for migrations or partial restores unless you explicitly want ProxSave to delete existing PBS objects. - --- ## Security Settings diff --git a/docs/RESTORE_DIAGRAMS.md b/docs/RESTORE_DIAGRAMS.md index 674db02..3dca90a 100644 --- a/docs/RESTORE_DIAGRAMS.md +++ b/docs/RESTORE_DIAGRAMS.md @@ -138,7 +138,7 @@ flowchart TD style CheckboxMenu fill:#87CEEB ``` -**Note (PBS)**: Staged PBS categories can be applied either by writing staged `*.cfg` files back to `/etc/proxmox-backup` or via `proxmox-backup-manager`, depending on `RESTORE_PBS_APPLY_MODE`. +**Note (PBS)**: ProxSave applies supported PBS staged categories via `proxmox-backup-manager` by default. In **Clean 1:1** mode it may fall back to writing staged `*.cfg` files back to `/etc/proxmox-backup` when API apply is unavailable or fails. --- diff --git a/docs/RESTORE_GUIDE.md b/docs/RESTORE_GUIDE.md index 4cfdc3e..bac43c2 100644 --- a/docs/RESTORE_GUIDE.md +++ b/docs/RESTORE_GUIDE.md @@ -107,7 +107,11 @@ Each category is handled in one of three ways: ### PBS-Specific Categories (9 categories) -**PBS staged apply mode**: For staged PBS categories, the apply method is controlled by `RESTORE_PBS_APPLY_MODE` (`auto` by default). When using `api`/`auto`, ProxSave applies supported PBS categories via `proxmox-backup-manager` and can optionally do strict 1:1 reconciliation with `RESTORE_PBS_STRICT=true`. +**PBS staged apply behavior**: During restore on PBS, ProxSave prompts you to choose how to reconcile PBS objects: +- **Merge (existing PBS)**: intended for restoring onto an already operational PBS; applies supported PBS categories via `proxmox-backup-manager` without deleting existing objects that are not in the backup. +- **Clean 1:1 (fresh PBS install)**: intended for restoring onto a new, clean PBS; attempts to make supported PBS objects match the backup (may remove objects not in the backup). + +API apply is automatic for supported PBS staged categories; ProxSave may fall back to file-based staged apply only in **Clean 1:1** mode. | Category | Name | Description | Paths | |----------|------|-------------|-------| @@ -227,10 +231,10 @@ Select restore mode: - `zfs` - ZFS configuration **PBS Categories**: -- `datastore_pbs` - Datastore definitions (staged apply; API/file controlled by `RESTORE_PBS_APPLY_MODE`) +- `datastore_pbs` - Datastore definitions (staged apply; API preferred, file fallback in Clean 1:1) - `maintenance_pbs` - Maintenance settings -- `pbs_jobs` - Sync/verify/prune jobs (staged apply; API/file controlled by `RESTORE_PBS_APPLY_MODE`) -- `pbs_remotes` - Remotes for sync jobs (staged apply; API/file controlled by `RESTORE_PBS_APPLY_MODE`) +- `pbs_jobs` - Sync/verify/prune jobs (staged apply; API preferred, file fallback in Clean 1:1) +- `pbs_remotes` - Remotes for sync jobs (staged apply; API preferred, file fallback in Clean 1:1) - `filesystem` - /etc/fstab - `storage_stack` - Storage stack config (mount prerequisites) - `zfs` - ZFS configuration diff --git a/docs/RESTORE_TECHNICAL.md b/docs/RESTORE_TECHNICAL.md index 1192f67..2bd5c02 100644 --- a/docs/RESTORE_TECHNICAL.md +++ b/docs/RESTORE_TECHNICAL.md @@ -874,12 +874,13 @@ func extractSelectiveArchive( After extraction, **staged categories** are applied from the staging directory under `/tmp/proxsave/restore-stage-*`. **PBS staged apply**: -- Controlled by `RESTORE_PBS_APPLY_MODE` (`file` | `api` | `auto`) and `RESTORE_PBS_STRICT`. -- `file`: applies the staged `*.cfg` files back to `/etc/proxmox-backup` (legacy behavior). -- `api`: applies supported PBS categories via `proxmox-backup-manager` (create/update/remove, with optional strict 1:1 reconciliation). -- `auto` (default): prefers `api`, falls back to `file` on failures (e.g. services cannot be started, missing CLI binary, command errors). +- Selected interactively during restore on PBS hosts: **Merge (existing PBS)** vs **Clean 1:1 (fresh PBS install)**. +- ProxSave applies supported PBS categories via `proxmox-backup-manager`. + - **Merge**: create/update only (no deletions of existing objects not in the backup). + - **Clean 1:1**: attempts 1:1 reconciliation (may remove objects not present in the backup). +- If API apply is unavailable or fails, ProxSave may fall back to applying staged `*.cfg` files back to `/etc/proxmox-backup` (**Clean 1:1 only**). -**Current PBS API coverage** (when `api`/`auto`): +**Current PBS API coverage**: - `pbs_host`: node + traffic control - `datastore_pbs`: datastores + S3 endpoints - `pbs_remotes`: remotes @@ -1096,7 +1097,7 @@ func shouldStopPBSServices(categories []Category) bool { } ``` -**API apply note**: When `RESTORE_PBS_APPLY_MODE` is `api`/`auto`, ProxSave may start PBS services again during the **staged apply** phase to run `proxmox-backup-manager` commands (even if services were stopped earlier for safe file extraction). +**API apply note**: When ProxSave applies PBS staged categories via API (`proxmox-backup-manager`), it may start PBS services again during the **staged apply** phase (even if services were stopped earlier for safe file extraction). ### Error Handling Philosophy diff --git a/internal/config/config.go b/internal/config/config.go index 7f07dad..50094b1 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -199,32 +199,32 @@ type Config struct { CephConfigPath string // PBS-specific collection options - BackupDatastoreConfigs bool - BackupPBSS3Endpoints bool - BackupPBSNodeConfig bool - BackupPBSAcmeAccounts bool - BackupPBSAcmePlugins bool - BackupPBSMetricServers bool - BackupPBSTrafficControl bool - BackupPBSNotifications bool + BackupDatastoreConfigs bool + BackupPBSS3Endpoints bool + BackupPBSNodeConfig bool + BackupPBSAcmeAccounts bool + BackupPBSAcmePlugins bool + BackupPBSMetricServers bool + BackupPBSTrafficControl bool + BackupPBSNotifications bool BackupPBSNotificationsPriv bool - BackupUserConfigs bool - BackupRemoteConfigs bool - BackupSyncJobs bool - BackupVerificationJobs bool - BackupTapeConfigs bool - BackupPBSNetworkConfig bool - BackupPruneSchedules bool - BackupPxarFiles bool - PxarDatastoreConcurrency int - PxarIntraConcurrency int - PxarScanFanoutLevel int - PxarScanMaxRoots int - PxarStopOnCap bool - PxarEnumWorkers int - PxarEnumBudgetMs int - PxarFileIncludePatterns []string - PxarFileExcludePatterns []string + BackupUserConfigs bool + BackupRemoteConfigs bool + BackupSyncJobs bool + BackupVerificationJobs bool + BackupTapeConfigs bool + BackupPBSNetworkConfig bool + BackupPruneSchedules bool + BackupPxarFiles bool + PxarDatastoreConcurrency int + PxarIntraConcurrency int + PxarScanFanoutLevel int + PxarScanMaxRoots int + PxarStopOnCap bool + PxarEnumWorkers int + PxarEnumBudgetMs int + PxarFileIncludePatterns []string + PxarFileExcludePatterns []string // System collection options BackupNetworkConfigs bool @@ -259,15 +259,6 @@ type Config struct { PBSPassword string // Auto-detected API token secret PBSFingerprint string // Auto-detected from PBS certificate - // Restore settings - // RestorePBSApplyMode controls how PBS config is applied during restore: - // - "file": write staged *.cfg files to /etc/proxmox-backup (legacy behavior) - // - "api": apply via proxmox-backup-manager / proxmox-tape where possible - // - "auto": prefer API; fall back to file-based apply on failures - RestorePBSApplyMode string - // RestorePBSStrict enables 1:1 reconciliation for PBS categories (remove items not present in backup). - RestorePBSStrict bool - // raw configuration map raw map[string]string } @@ -303,15 +294,14 @@ func LoadConfig(configPath string) (*Config, error) { // This allows environment variables to take precedence over file configuration func (c *Config) loadEnvOverrides() { // List of all configuration keys that can be overridden by environment variables - envKeys := []string{ - "BACKUP_ENABLED", "DRY_RUN", "DEBUG_LEVEL", "USE_COLOR", "COLORIZE_STEP_LOGS", - "PROFILING_ENABLED", - "RESTORE_PBS_APPLY_MODE", "RESTORE_PBS_STRICT", - "COMPRESSION_TYPE", "COMPRESSION_LEVEL", "COMPRESSION_THREADS", "COMPRESSION_MODE", - "ENABLE_SMART_CHUNKING", "ENABLE_DEDUPLICATION", "ENABLE_PREFILTER", - "CHUNK_SIZE_MB", "CHUNK_THRESHOLD_MB", "PREFILTER_MAX_FILE_SIZE_MB", - "BACKUP_PATH", "LOG_PATH", "LOCK_PATH", "SECURE_ACCOUNT", - "SECONDARY_ENABLED", "SECONDARY_PATH", "SECONDARY_LOG_PATH", + envKeys := []string{ + "BACKUP_ENABLED", "DRY_RUN", "DEBUG_LEVEL", "USE_COLOR", "COLORIZE_STEP_LOGS", + "PROFILING_ENABLED", + "COMPRESSION_TYPE", "COMPRESSION_LEVEL", "COMPRESSION_THREADS", "COMPRESSION_MODE", + "ENABLE_SMART_CHUNKING", "ENABLE_DEDUPLICATION", "ENABLE_PREFILTER", + "CHUNK_SIZE_MB", "CHUNK_THRESHOLD_MB", "PREFILTER_MAX_FILE_SIZE_MB", + "BACKUP_PATH", "LOG_PATH", "LOCK_PATH", "SECURE_ACCOUNT", + "SECONDARY_ENABLED", "SECONDARY_PATH", "SECONDARY_LOG_PATH", "CLOUD_ENABLED", "CLOUD_REMOTE", "CLOUD_REMOTE_PATH", "CLOUD_LOG_PATH", "CLOUD_UPLOAD_MODE", "CLOUD_PARALLEL_MAX_JOBS", "CLOUD_PARALLEL_VERIFICATION", "CLOUD_WRITE_HEALTHCHECK", @@ -359,7 +349,6 @@ func (c *Config) parse() error { if err := c.parseCollectionSettings(); err != nil { return err } - c.parseRestoreSettings() c.autoDetectPBSAuth() return nil } @@ -378,17 +367,6 @@ func (c *Config) parseGeneralSettings() { c.ColorizeStepLogs = c.getBool("COLORIZE_STEP_LOGS", true) && c.UseColor } -func (c *Config) parseRestoreSettings() { - mode := strings.ToLower(strings.TrimSpace(c.getString("RESTORE_PBS_APPLY_MODE", "auto"))) - switch mode { - case "file", "api", "auto": - default: - mode = "auto" - } - c.RestorePBSApplyMode = mode - c.RestorePBSStrict = c.getBool("RESTORE_PBS_STRICT", false) -} - func (c *Config) parseCompressionSettings() { c.CompressionType = normalizeCompressionType(c.getCompressionType("COMPRESSION_TYPE", types.CompressionXZ)) c.CompressionLevel = c.getInt("COMPRESSION_LEVEL", 6) diff --git a/internal/config/templates/backup.env b/internal/config/templates/backup.env index 0c45e3e..01244a2 100644 --- a/internal/config/templates/backup.env +++ b/internal/config/templates/backup.env @@ -14,17 +14,6 @@ COLORIZE_STEP_LOGS=true # Highlight "Step N/8" lines (requires USE_COLOR=tr DEBUG_LEVEL=standard # standard | advanced | extreme DRY_RUN=false # Set to false for real runs -# ---------------------------------------------------------------------- -# Restore (PBS) -# ---------------------------------------------------------------------- -# How to apply PBS configuration during restore: -# - file: restore staged *.cfg files to /etc/proxmox-backup (legacy behavior) -# - api: apply via proxmox-backup-manager / proxmox-tape where possible -# - auto: prefer API; fall back to file-based apply on failures -RESTORE_PBS_APPLY_MODE=auto # file | api | auto -# When true, remove PBS objects not present in the backup (1:1 reconciliation). -RESTORE_PBS_STRICT=false - # ---------------------------------------------------------------------- # Security # ---------------------------------------------------------------------- diff --git a/internal/orchestrator/pbs_api_apply.go b/internal/orchestrator/pbs_api_apply.go index d5d233d..00961d3 100644 --- a/internal/orchestrator/pbs_api_apply.go +++ b/internal/orchestrator/pbs_api_apply.go @@ -9,33 +9,9 @@ import ( "strings" "time" - "github.com/tis24dev/proxsave/internal/config" "github.com/tis24dev/proxsave/internal/logging" ) -const ( - pbsApplyModeFile = "file" - pbsApplyModeAPI = "api" - pbsApplyModeAuto = "auto" -) - -func normalizePBSApplyMode(cfg *config.Config) string { - if cfg == nil { - return pbsApplyModeAuto - } - mode := strings.ToLower(strings.TrimSpace(cfg.RestorePBSApplyMode)) - switch mode { - case pbsApplyModeFile, pbsApplyModeAPI, pbsApplyModeAuto: - return mode - default: - return pbsApplyModeAuto - } -} - -func pbsStrictRestore(cfg *config.Config) bool { - return cfg != nil && cfg.RestorePBSStrict -} - func normalizeProxmoxCfgKey(key string) string { key = strings.ToLower(strings.TrimSpace(key)) key = strings.ReplaceAll(key, "_", "-") @@ -417,7 +393,7 @@ func applyPBSDatastoreCfgViaAPI(ctx context.Context, logger *logging.Logger, sta } continue } - logger.Warning("PBS API apply: datastore %s path mismatch (%s != %s); leaving path unchanged (enable RESTORE_PBS_STRICT=true for 1:1)", name, currentPath, path) + logger.Warning("PBS API apply: datastore %s path mismatch (%s != %s); leaving path unchanged (use Clean 1:1 restore to enforce 1:1)", name, currentPath, path) } updateArgs := append([]string{"datastore", "update", name}, flags...) @@ -706,94 +682,3 @@ func applyPBSNodeCfgViaAPI(ctx context.Context, logger *logging.Logger, stageRoo } return nil } - -func applyPBSCategoriesViaAPI(ctx context.Context, logger *logging.Logger, plan *RestorePlan, cfg *config.Config, stageRoot string) error { - if plan == nil || plan.SystemType != SystemTypePBS { - return nil - } - mode := normalizePBSApplyMode(cfg) - if mode == pbsApplyModeFile { - return nil - } - - strict := pbsStrictRestore(cfg) - - if err := ensurePBSServicesForAPI(ctx, logger); err != nil { - if mode == pbsApplyModeAuto { - logger.Warning("PBS API apply unavailable; falling back to file-based apply: %v", err) - return nil - } - return err - } - - // Apply in dependency-safe order. - if plan.HasCategoryID("pbs_host") { - if err := applyPBSNodeCfgViaAPI(ctx, logger, stageRoot); err != nil { - if mode == pbsApplyModeAuto { - logger.Warning("PBS API apply: node.cfg failed (continuing with file-based apply): %v", err) - } else { - return err - } - } - if err := applyPBSTrafficControlCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { - if mode == pbsApplyModeAuto { - logger.Warning("PBS API apply: traffic-control.cfg failed (continuing with file-based apply): %v", err) - } else { - return err - } - } - } - - if plan.HasCategoryID("datastore_pbs") { - if err := applyPBSS3CfgViaAPI(ctx, logger, stageRoot, strict); err != nil { - if mode == pbsApplyModeAuto { - logger.Warning("PBS API apply: s3.cfg failed (continuing with file-based apply): %v", err) - } else { - return err - } - } - if err := applyPBSDatastoreCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { - if mode == pbsApplyModeAuto { - logger.Warning("PBS API apply: datastore.cfg failed (continuing with file-based apply): %v", err) - } else { - return err - } - } - } - - if plan.HasCategoryID("pbs_remotes") { - if err := applyPBSRemoteCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { - if mode == pbsApplyModeAuto { - logger.Warning("PBS API apply: remote.cfg failed (continuing with file-based apply): %v", err) - } else { - return err - } - } - } - - if plan.HasCategoryID("pbs_jobs") { - if err := applyPBSSyncCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { - if mode == pbsApplyModeAuto { - logger.Warning("PBS API apply: sync.cfg failed (continuing with file-based apply): %v", err) - } else { - return err - } - } - if err := applyPBSVerificationCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { - if mode == pbsApplyModeAuto { - logger.Warning("PBS API apply: verification.cfg failed (continuing with file-based apply): %v", err) - } else { - return err - } - } - if err := applyPBSPruneCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { - if mode == pbsApplyModeAuto { - logger.Warning("PBS API apply: prune.cfg failed (continuing with file-based apply): %v", err) - } else { - return err - } - } - } - - return nil -} diff --git a/internal/orchestrator/pbs_restore_behavior.go b/internal/orchestrator/pbs_restore_behavior.go new file mode 100644 index 0000000..a55b57d --- /dev/null +++ b/internal/orchestrator/pbs_restore_behavior.go @@ -0,0 +1,33 @@ +package orchestrator + +// PBSRestoreBehavior controls how PBS objects are reconciled during staged apply. +// It is intentionally chosen at restore time (UI), not via backup.env. +type PBSRestoreBehavior int + +const ( + PBSRestoreBehaviorUnspecified PBSRestoreBehavior = iota + PBSRestoreBehaviorMerge + PBSRestoreBehaviorClean +) + +func (b PBSRestoreBehavior) String() string { + switch b { + case PBSRestoreBehaviorMerge: + return "merge" + case PBSRestoreBehaviorClean: + return "clean-1to1" + default: + return "unspecified" + } +} + +func (b PBSRestoreBehavior) DisplayName() string { + switch b { + case PBSRestoreBehaviorMerge: + return "Merge (existing PBS)" + case PBSRestoreBehaviorClean: + return "Clean 1:1 (fresh PBS install)" + default: + return "Unspecified" + } +} diff --git a/internal/orchestrator/pbs_staged_apply.go b/internal/orchestrator/pbs_staged_apply.go index 1cda623..c469deb 100644 --- a/internal/orchestrator/pbs_staged_apply.go +++ b/internal/orchestrator/pbs_staged_apply.go @@ -9,11 +9,10 @@ import ( "path/filepath" "strings" - "github.com/tis24dev/proxsave/internal/config" "github.com/tis24dev/proxsave/internal/logging" ) -func maybeApplyPBSConfigsFromStage(ctx context.Context, logger *logging.Logger, plan *RestorePlan, cfg *config.Config, stageRoot string, dryRun bool) (err error) { +func maybeApplyPBSConfigsFromStage(ctx context.Context, logger *logging.Logger, plan *RestorePlan, stageRoot string, dryRun bool) (err error) { if plan == nil || plan.SystemType != SystemTypePBS { return nil } @@ -41,18 +40,21 @@ func maybeApplyPBSConfigsFromStage(ctx context.Context, logger *logging.Logger, return nil } - mode := normalizePBSApplyMode(cfg) - strict := pbsStrictRestore(cfg) + behavior := plan.PBSRestoreBehavior + strict := behavior == PBSRestoreBehaviorClean + allowFileFallback := behavior == PBSRestoreBehaviorClean - needsAPI := mode != pbsApplyModeFile && (plan.HasCategoryID("pbs_host") || plan.HasCategoryID("datastore_pbs") || plan.HasCategoryID("pbs_remotes") || plan.HasCategoryID("pbs_jobs")) + needsAPI := plan.HasCategoryID("pbs_host") || plan.HasCategoryID("datastore_pbs") || plan.HasCategoryID("pbs_remotes") || plan.HasCategoryID("pbs_jobs") + apiAvailable := false if needsAPI { if err := ensurePBSServicesForAPI(ctx, logger); err != nil { - if mode == pbsApplyModeAuto { - logger.Warning("PBS API apply unavailable; falling back to file-based staged apply: %v", err) - mode = pbsApplyModeFile + if allowFileFallback { + logger.Warning("PBS API apply unavailable; falling back to file-based staged apply where possible: %v", err) } else { - return err + logger.Warning("PBS API apply unavailable; skipping API-applied PBS categories (merge mode): %v", err) } + } else { + apiAvailable = true } } @@ -70,24 +72,22 @@ func maybeApplyPBSConfigsFromStage(ctx context.Context, logger *logging.Logger, } } - if mode != pbsApplyModeFile { + if apiAvailable { if err := applyPBSTrafficControlCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { - if mode == pbsApplyModeAuto { - logger.Warning("PBS API apply: traffic-control failed; falling back to file-based: %v", err) + logger.Warning("PBS API apply: traffic-control failed: %v", err) + if allowFileFallback { + logger.Warning("PBS staged apply: falling back to file-based traffic-control.cfg") _ = applyPBSConfigFileFromStage(ctx, logger, stageRoot, "etc/proxmox-backup/traffic-control.cfg") - } else { - return err } } if err := applyPBSNodeCfgViaAPI(ctx, logger, stageRoot); err != nil { - if mode == pbsApplyModeAuto { - logger.Warning("PBS API apply: node config failed; falling back to file-based: %v", err) + logger.Warning("PBS API apply: node config failed: %v", err) + if allowFileFallback { + logger.Warning("PBS staged apply: falling back to file-based node.cfg") _ = applyPBSConfigFileFromStage(ctx, logger, stageRoot, "etc/proxmox-backup/node.cfg") - } else { - return err } } - } else { + } else if allowFileFallback { for _, rel := range []string{ "etc/proxmox-backup/traffic-control.cfg", "etc/proxmox-backup/node.cfg", @@ -96,84 +96,86 @@ func maybeApplyPBSConfigsFromStage(ctx context.Context, logger *logging.Logger, logger.Warning("PBS staged apply: %s: %v", rel, err) } } + } else { + logging.DebugStep(logger, "pbs staged apply", "Skipping node.cfg/traffic-control.cfg: merge mode requires PBS API apply") } } if plan.HasCategoryID("datastore_pbs") { - if mode != pbsApplyModeFile { + if apiAvailable { if err := applyPBSS3CfgViaAPI(ctx, logger, stageRoot, strict); err != nil { - if mode == pbsApplyModeAuto { - logger.Warning("PBS API apply: s3.cfg failed; falling back to file-based: %v", err) + logger.Warning("PBS API apply: s3.cfg failed: %v", err) + if allowFileFallback { + logger.Warning("PBS staged apply: falling back to file-based s3.cfg") _ = applyPBSS3CfgFromStage(ctx, logger, stageRoot) - } else { - return err } } if err := applyPBSDatastoreCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { - if mode == pbsApplyModeAuto { - logger.Warning("PBS API apply: datastore.cfg failed; falling back to file-based: %v", err) + logger.Warning("PBS API apply: datastore.cfg failed: %v", err) + if allowFileFallback { + logger.Warning("PBS staged apply: falling back to file-based datastore.cfg") _ = applyPBSDatastoreCfgFromStage(ctx, logger, stageRoot) - } else { - return err } } - } else { + } else if allowFileFallback { if err := applyPBSS3CfgFromStage(ctx, logger, stageRoot); err != nil { logger.Warning("PBS staged apply: s3.cfg: %v", err) } if err := applyPBSDatastoreCfgFromStage(ctx, logger, stageRoot); err != nil { logger.Warning("PBS staged apply: datastore.cfg: %v", err) } + } else { + logging.DebugStep(logger, "pbs staged apply", "Skipping datastore.cfg/s3.cfg: merge mode requires PBS API apply") } } if plan.HasCategoryID("pbs_remotes") { - if mode != pbsApplyModeFile { + if apiAvailable { if err := applyPBSRemoteCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { - if mode == pbsApplyModeAuto { - logger.Warning("PBS API apply: remote.cfg failed; falling back to file-based: %v", err) + logger.Warning("PBS API apply: remote.cfg failed: %v", err) + if allowFileFallback { + logger.Warning("PBS staged apply: falling back to file-based remote.cfg") _ = applyPBSRemoteCfgFromStage(ctx, logger, stageRoot) - } else { - return err } } - } else { + } else if allowFileFallback { if err := applyPBSRemoteCfgFromStage(ctx, logger, stageRoot); err != nil { logger.Warning("PBS staged apply: remote.cfg: %v", err) } + } else { + logging.DebugStep(logger, "pbs staged apply", "Skipping remote.cfg: merge mode requires PBS API apply") } } if plan.HasCategoryID("pbs_jobs") { - if mode != pbsApplyModeFile { + if apiAvailable { if err := applyPBSSyncCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { - if mode == pbsApplyModeAuto { - logger.Warning("PBS API apply: sync jobs failed; falling back to file-based: %v", err) + logger.Warning("PBS API apply: sync jobs failed: %v", err) + if allowFileFallback { + logger.Warning("PBS staged apply: falling back to file-based job configs") _ = applyPBSJobConfigsFromStage(ctx, logger, stageRoot) - } else { - return err } } if err := applyPBSVerificationCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { - if mode == pbsApplyModeAuto { - logger.Warning("PBS API apply: verification jobs failed; falling back to file-based: %v", err) + logger.Warning("PBS API apply: verification jobs failed: %v", err) + if allowFileFallback { + logger.Warning("PBS staged apply: falling back to file-based job configs") _ = applyPBSJobConfigsFromStage(ctx, logger, stageRoot) - } else { - return err } } if err := applyPBSPruneCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { - if mode == pbsApplyModeAuto { - logger.Warning("PBS API apply: prune jobs failed; falling back to file-based: %v", err) + logger.Warning("PBS API apply: prune jobs failed: %v", err) + if allowFileFallback { + logger.Warning("PBS staged apply: falling back to file-based job configs") _ = applyPBSJobConfigsFromStage(ctx, logger, stageRoot) - } else { - return err } } - } else { + } else if allowFileFallback { if err := applyPBSJobConfigsFromStage(ctx, logger, stageRoot); err != nil { logger.Warning("PBS staged apply: job configs: %v", err) } + } else { + logging.DebugStep(logger, "pbs staged apply", "Skipping sync/verification/prune configs: merge mode requires PBS API apply") } } diff --git a/internal/orchestrator/restore_notifications.go b/internal/orchestrator/restore_notifications.go index 16c1acd..3774a5d 100644 --- a/internal/orchestrator/restore_notifications.go +++ b/internal/orchestrator/restore_notifications.go @@ -8,7 +8,6 @@ import ( "path/filepath" "strings" - "github.com/tis24dev/proxsave/internal/config" "github.com/tis24dev/proxsave/internal/logging" ) @@ -24,7 +23,7 @@ type proxmoxNotificationSection struct { RedactFlags []string } -func maybeApplyNotificationsFromStage(ctx context.Context, logger *logging.Logger, plan *RestorePlan, cfg *config.Config, stageRoot string, dryRun bool) (err error) { +func maybeApplyNotificationsFromStage(ctx context.Context, logger *logging.Logger, plan *RestorePlan, stageRoot string, dryRun bool) (err error) { if plan == nil { return nil } @@ -57,26 +56,28 @@ func maybeApplyNotificationsFromStage(ctx context.Context, logger *logging.Logge if !plan.HasCategoryID("pbs_notifications") { return nil } - mode := normalizePBSApplyMode(cfg) - strict := pbsStrictRestore(cfg) - if mode == pbsApplyModeFile { - return applyPBSNotificationsFromStage(ctx, logger, stageRoot) - } + behavior := plan.PBSRestoreBehavior + strict := behavior == PBSRestoreBehaviorClean + allowFileFallback := behavior == PBSRestoreBehaviorClean + if err := ensurePBSServicesForAPI(ctx, logger); err != nil { - if mode == pbsApplyModeAuto { + if allowFileFallback { logger.Warning("PBS notifications API apply unavailable; falling back to file-based apply: %v", err) return applyPBSNotificationsFromStage(ctx, logger, stageRoot) } - return err + logger.Warning("PBS notifications API apply unavailable; skipping apply (merge mode): %v", err) + return nil } + if err := applyPBSNotificationsViaAPI(ctx, logger, stageRoot, strict); err != nil { - if mode == pbsApplyModeAuto { + if allowFileFallback { logger.Warning("PBS notifications API apply failed; falling back to file-based apply: %v", err) return applyPBSNotificationsFromStage(ctx, logger, stageRoot) } - return err + logger.Warning("PBS notifications API apply failed; skipping apply (merge mode): %v", err) + return nil } - logger.Info("PBS notifications applied via API") + logger.Info("PBS notifications applied via API (%s)", behavior.DisplayName()) return nil case SystemTypePVE: if !plan.HasCategoryID("pve_notifications") { diff --git a/internal/orchestrator/restore_plan.go b/internal/orchestrator/restore_plan.go index 3c88564..6d54716 100644 --- a/internal/orchestrator/restore_plan.go +++ b/internal/orchestrator/restore_plan.go @@ -13,6 +13,7 @@ type RestorePlan struct { NormalCategories []Category StagedCategories []Category ExportCategories []Category + PBSRestoreBehavior PBSRestoreBehavior ClusterBackup bool ClusterSafeMode bool NeedsClusterRestore bool @@ -80,4 +81,3 @@ func (p *RestorePlan) HasCategoryID(id string) bool { } return hasCategoryID(p.NormalCategories, id) || hasCategoryID(p.StagedCategories, id) || hasCategoryID(p.ExportCategories, id) } - diff --git a/internal/orchestrator/restore_tui.go b/internal/orchestrator/restore_tui.go index be03115..4eaaf94 100644 --- a/internal/orchestrator/restore_tui.go +++ b/internal/orchestrator/restore_tui.go @@ -143,6 +143,90 @@ func selectRestoreModeTUI(systemType SystemType, configPath, buildSig, backupSum return selected, nil } +func selectPBSRestoreBehaviorTUI(configPath, buildSig, backupSummary string) (PBSRestoreBehavior, error) { + app := newTUIApp() + var selected PBSRestoreBehavior + var aborted bool + + list := tview.NewList().ShowSecondaryText(true) + list.SetMainTextColor(tcell.ColorWhite). + SetSelectedTextColor(tcell.ColorWhite). + SetSelectedBackgroundColor(tui.ProxmoxOrange) + + list.AddItem( + "1) Merge (existing PBS)", + "Restore onto an already operational PBS. Avoids API-side deletions of existing PBS objects that are not in the backup.", + 0, + nil, + ) + list.AddItem( + "2) Clean 1:1 (fresh PBS install)", + "Restore onto a new, clean PBS installation. Tries to make PBS configuration match the backup (may remove objects not in the backup).", + 0, + nil, + ) + + list.SetSelectedFunc(func(index int, mainText, secondaryText string, shortcut rune) { + switch index { + case 0: + selected = PBSRestoreBehaviorMerge + case 1: + selected = PBSRestoreBehaviorClean + default: + selected = PBSRestoreBehaviorUnspecified + } + if selected != PBSRestoreBehaviorUnspecified { + app.Stop() + } + }) + list.SetDoneFunc(func() { + aborted = true + app.Stop() + }) + + form := components.NewForm(app) + listItem := components.NewListFormItem(list). + SetLabel("Select PBS restore behavior"). + SetFieldHeight(6) + form.Form.AddFormItem(listItem) + form.Form.SetFocus(0) + + form.SetOnCancel(func() { + aborted = true + }) + form.AddCancelButton("Cancel") + enableFormNavigation(form, nil) + + // Selected backup summary + summaryText := strings.TrimSpace(backupSummary) + var summaryView tview.Primitive + if summaryText != "" { + summary := tview.NewTextView(). + SetText(fmt.Sprintf("Selected backup: %s", summaryText)). + SetWrap(true). + SetTextColor(tcell.ColorWhite) + summary.SetBorder(false) + summaryView = summary + } else { + summaryView = tview.NewBox() + } + + content := tview.NewFlex(). + SetDirection(tview.FlexRow). + AddItem(summaryView, 2, 0, false). + AddItem(form.Form, 0, 1, true) + + page := buildRestoreWizardPage("PBS restore behavior", configPath, buildSig, content) + app.SetRoot(page, true).SetFocus(form.Form) + if err := app.Run(); err != nil { + return PBSRestoreBehaviorUnspecified, err + } + if aborted || selected == PBSRestoreBehaviorUnspecified { + return PBSRestoreBehaviorUnspecified, ErrRestoreAborted + } + return selected, nil +} + func filterAndSortCategoriesForSystem(available []Category, systemType SystemType) []Category { relevant := make([]Category, 0, len(available)) for _, cat := range available { diff --git a/internal/orchestrator/restore_workflow_ui.go b/internal/orchestrator/restore_workflow_ui.go index b691bcc..ce0bf9b 100644 --- a/internal/orchestrator/restore_workflow_ui.go +++ b/internal/orchestrator/restore_workflow_ui.go @@ -129,6 +129,22 @@ func runRestoreWorkflowWithUI(ctx context.Context, cfg *config.Config, logger *l plan := PlanRestore(candidate.Manifest, selectedCategories, systemType, mode) + if plan.SystemType == SystemTypePBS && + (plan.HasCategoryID("pbs_host") || + plan.HasCategoryID("datastore_pbs") || + plan.HasCategoryID("pbs_remotes") || + plan.HasCategoryID("pbs_jobs") || + plan.HasCategoryID("pbs_notifications") || + plan.HasCategoryID("pbs_access_control") || + plan.HasCategoryID("pbs_tape")) { + behavior, err := ui.SelectPBSRestoreBehavior(ctx) + if err != nil { + return err + } + plan.PBSRestoreBehavior = behavior + logger.Info("PBS restore behavior: %s", behavior.DisplayName()) + } + clusterBackup := strings.EqualFold(strings.TrimSpace(candidate.Manifest.ClusterMode), "cluster") if plan.NeedsClusterRestore && clusterBackup { logger.Info("Backup marked as cluster node; enabling guarded restore options for pve_cluster") @@ -508,12 +524,12 @@ func runRestoreWorkflowWithUI(ctx context.Context, cfg *config.Config, logger *l } logger.Info("") - if err := maybeApplyPBSConfigsFromStage(ctx, logger, plan, cfg, stageRoot, cfg.DryRun); err != nil { - if errors.Is(err, ErrRestoreAborted) || input.IsAborted(err) { - return err - } - restoreHadWarnings = true - logger.Warning("PBS staged config apply: %v", err) + if err := maybeApplyPBSConfigsFromStage(ctx, logger, plan, stageRoot, cfg.DryRun); err != nil { + if errors.Is(err, ErrRestoreAborted) || input.IsAborted(err) { + return err + } + restoreHadWarnings = true + logger.Warning("PBS staged config apply: %v", err) } if err := maybeApplyPVEConfigsFromStage(ctx, logger, plan, stageRoot, destRoot, cfg.DryRun); err != nil { if errors.Is(err, ErrRestoreAborted) || input.IsAborted(err) { @@ -559,12 +575,12 @@ func runRestoreWorkflowWithUI(ctx context.Context, cfg *config.Config, logger *l logger.Warning("Access control staged apply: %v", err) } } - if err := maybeApplyNotificationsFromStage(ctx, logger, plan, cfg, stageRoot, cfg.DryRun); err != nil { - if errors.Is(err, ErrRestoreAborted) || input.IsAborted(err) { - return err - } - restoreHadWarnings = true - logger.Warning("Notifications staged apply: %v", err) + if err := maybeApplyNotificationsFromStage(ctx, logger, plan, stageRoot, cfg.DryRun); err != nil { + if errors.Is(err, ErrRestoreAborted) || input.IsAborted(err) { + return err + } + restoreHadWarnings = true + logger.Warning("Notifications staged apply: %v", err) } } diff --git a/internal/orchestrator/restore_workflow_ui_helpers_test.go b/internal/orchestrator/restore_workflow_ui_helpers_test.go index dc03218..f2a0372 100644 --- a/internal/orchestrator/restore_workflow_ui_helpers_test.go +++ b/internal/orchestrator/restore_workflow_ui_helpers_test.go @@ -7,40 +7,46 @@ import ( ) type fakeRestoreWorkflowUI struct { - mode RestoreMode - categories []Category - confirmRestore bool - confirmCompatible bool - clusterMode ClusterRestoreMode - continueNoSafety bool + mode RestoreMode + categories []Category + pbsBehavior PBSRestoreBehavior + confirmRestore bool + confirmCompatible bool + clusterMode ClusterRestoreMode + continueNoSafety bool continuePBSServices bool - confirmFstabMerge bool - exportNode string - applyVMConfigs bool - applyStorageCfg bool - applyDatacenterCfg bool - confirmAction bool - networkCommit bool - - modeErr error - categoriesErr error - confirmRestoreErr error - confirmCompatibleErr error - clusterModeErr error - continueNoSafetyErr error + confirmFstabMerge bool + exportNode string + applyVMConfigs bool + applyStorageCfg bool + applyDatacenterCfg bool + confirmAction bool + networkCommit bool + + modeErr error + categoriesErr error + pbsBehaviorErr error + confirmRestoreErr error + confirmCompatibleErr error + clusterModeErr error + continueNoSafetyErr error continuePBSServicesErr error - confirmFstabMergeErr error - confirmActionErr error - repairNICNamesErr error - networkCommitErr error + confirmFstabMergeErr error + confirmActionErr error + repairNICNamesErr error + networkCommitErr error } func (f *fakeRestoreWorkflowUI) RunTask(ctx context.Context, title, initialMessage string, run func(ctx context.Context, report ProgressReporter) error) error { return run(ctx, nil) } -func (f *fakeRestoreWorkflowUI) ShowMessage(ctx context.Context, title, message string) error { return nil } -func (f *fakeRestoreWorkflowUI) ShowError(ctx context.Context, title, message string) error { return nil } +func (f *fakeRestoreWorkflowUI) ShowMessage(ctx context.Context, title, message string) error { + return nil +} +func (f *fakeRestoreWorkflowUI) ShowError(ctx context.Context, title, message string) error { + return nil +} func (f *fakeRestoreWorkflowUI) SelectBackupSource(ctx context.Context, options []decryptPathOption) (decryptPathOption, error) { return decryptPathOption{}, fmt.Errorf("unexpected SelectBackupSource call") @@ -62,7 +68,16 @@ func (f *fakeRestoreWorkflowUI) SelectCategories(ctx context.Context, available return f.categories, f.categoriesErr } -func (f *fakeRestoreWorkflowUI) ShowRestorePlan(ctx context.Context, config *SelectiveRestoreConfig) error { return nil } +func (f *fakeRestoreWorkflowUI) SelectPBSRestoreBehavior(ctx context.Context) (PBSRestoreBehavior, error) { + if f.pbsBehavior == PBSRestoreBehaviorUnspecified && f.pbsBehaviorErr == nil { + return PBSRestoreBehaviorClean, nil + } + return f.pbsBehavior, f.pbsBehaviorErr +} + +func (f *fakeRestoreWorkflowUI) ShowRestorePlan(ctx context.Context, config *SelectiveRestoreConfig) error { + return nil +} func (f *fakeRestoreWorkflowUI) ConfirmRestore(ctx context.Context) (bool, error) { return f.confirmRestore, f.confirmRestoreErr @@ -115,4 +130,3 @@ func (f *fakeRestoreWorkflowUI) RepairNICNames(ctx context.Context, archivePath func (f *fakeRestoreWorkflowUI) PromptNetworkCommit(ctx context.Context, remaining time.Duration, health networkHealthReport, nicRepair *nicRepairResult, diagnosticsDir string) (bool, error) { return f.networkCommit, f.networkCommitErr } - diff --git a/internal/orchestrator/workflow_ui.go b/internal/orchestrator/workflow_ui.go index e951c5c..03db940 100644 --- a/internal/orchestrator/workflow_ui.go +++ b/internal/orchestrator/workflow_ui.go @@ -55,6 +55,7 @@ type RestoreWorkflowUI interface { PromptDecryptSecret(ctx context.Context, displayName, previousError string) (string, error) SelectRestoreMode(ctx context.Context, systemType SystemType) (RestoreMode, error) SelectCategories(ctx context.Context, available []Category, systemType SystemType) ([]Category, error) + SelectPBSRestoreBehavior(ctx context.Context) (PBSRestoreBehavior, error) ShowRestorePlan(ctx context.Context, config *SelectiveRestoreConfig) error ConfirmRestore(ctx context.Context) (bool, error) diff --git a/internal/orchestrator/workflow_ui_cli.go b/internal/orchestrator/workflow_ui_cli.go index 940c3b1..1d303c7 100644 --- a/internal/orchestrator/workflow_ui_cli.go +++ b/internal/orchestrator/workflow_ui_cli.go @@ -184,6 +184,32 @@ func (u *cliWorkflowUI) SelectCategories(ctx context.Context, available []Catego return ShowCategorySelectionMenuWithReader(ctx, u.reader, u.logger, available, systemType) } +func (u *cliWorkflowUI) SelectPBSRestoreBehavior(ctx context.Context) (PBSRestoreBehavior, error) { + fmt.Println() + fmt.Println("PBS restore reconciliation:") + fmt.Println(" [1] Merge (existing PBS) - Restore onto an already operational PBS (avoids API-side deletions of existing PBS objects not in the backup).") + fmt.Println(" [2] Clean 1:1 (fresh PBS install) - Restore onto a new, clean PBS and try to make configuration match the backup (may remove existing PBS objects not in the backup).") + fmt.Println(" [0] Exit") + + for { + fmt.Print("Choice: ") + line, err := input.ReadLineWithContext(ctx, u.reader) + if err != nil { + return PBSRestoreBehaviorUnspecified, err + } + switch strings.TrimSpace(line) { + case "1": + return PBSRestoreBehaviorMerge, nil + case "2": + return PBSRestoreBehaviorClean, nil + case "0": + return PBSRestoreBehaviorUnspecified, ErrRestoreAborted + default: + fmt.Println("Please enter 1, 2 or 0.") + } + } +} + func (u *cliWorkflowUI) ShowRestorePlan(ctx context.Context, config *SelectiveRestoreConfig) error { ShowRestorePlan(u.logger, config) return nil diff --git a/internal/orchestrator/workflow_ui_tui_restore.go b/internal/orchestrator/workflow_ui_tui_restore.go index 7c53681..350bd82 100644 --- a/internal/orchestrator/workflow_ui_tui_restore.go +++ b/internal/orchestrator/workflow_ui_tui_restore.go @@ -21,6 +21,10 @@ func (u *tuiWorkflowUI) SelectCategories(ctx context.Context, available []Catego return selectCategoriesTUI(available, systemType, u.configPath, u.buildSig) } +func (u *tuiWorkflowUI) SelectPBSRestoreBehavior(ctx context.Context) (PBSRestoreBehavior, error) { + return selectPBSRestoreBehaviorTUI(u.configPath, u.buildSig, strings.TrimSpace(u.selectedBackupSummary)) +} + func (u *tuiWorkflowUI) ShowRestorePlan(ctx context.Context, config *SelectiveRestoreConfig) error { return showRestorePlanTUI(config, u.configPath, u.buildSig) } @@ -147,4 +151,3 @@ func (u *tuiWorkflowUI) ConfirmApplyDatacenterCfg(ctx context.Context, datacente message := fmt.Sprintf("Datacenter configuration found:\n\n%s\n\nApply datacenter.cfg via pvesh now?", strings.TrimSpace(datacenterCfgPath)) return promptYesNoTUIFunc("Apply datacenter.cfg", u.configPath, u.buildSig, message, "Apply via API", "Skip") } - From ac01261dd68dc6451117cc233830f7b6987e8068 Mon Sep 17 00:00:00 2001 From: tis24dev Date: Thu, 12 Feb 2026 01:25:24 +0100 Subject: [PATCH 04/24] Add ctx cancellation and refactor restore code Collector: simplify Flush error assignment and add context cancellation checks (ctx.Err()) in aggregateBackupHistory and aggregateReplicationStatus, including periodic checks inside loops to allow early cancellation. Restore/Access Control: ensure ctx is non-nil and check ctx.Err() early in applyPBSAccessControlFromStage and applyPVEAccessControlFromStage. Remove unused logger parameter from applyPBSACLSectionFormat and applyPBSACLLineFormat (they no longer accept a logger). Restore/SDN: reserve ctx (_ = ctx) for future use and tighten error handling when stat'ing staged SDN (remove redundant nil check in the else-if condition). Restore/TUI: simplify NIC repair branch by removing a redundant nil check on plan.Mapping and delete the unused promptOkTUI helper. --- internal/backup/collector_pve.go | 14 ++++++++- .../orchestrator/restore_access_control.go | 22 ++++++++++--- internal/orchestrator/restore_sdn.go | 4 ++- internal/orchestrator/restore_tui.go | 31 +------------------ 4 files changed, 35 insertions(+), 36 deletions(-) diff --git a/internal/backup/collector_pve.go b/internal/backup/collector_pve.go index 756356a..f8a6efa 100644 --- a/internal/backup/collector_pve.go +++ b/internal/backup/collector_pve.go @@ -1326,7 +1326,7 @@ func (pw *patternWriter) Write(path string, info os.FileInfo) error { func (pw *patternWriter) Close() error { var err error if pw.writer != nil { - if flushErr := pw.writer.Flush(); flushErr != nil && err == nil { + if flushErr := pw.writer.Flush(); flushErr != nil { err = flushErr } } @@ -1556,6 +1556,9 @@ func (c *Collector) copyIfExists(source, target, description string) error { } func (c *Collector) aggregateBackupHistory(ctx context.Context, jobsDir, target string) error { + if err := ctx.Err(); err != nil { + return err + } entries, err := os.ReadDir(jobsDir) if err != nil { return err @@ -1563,6 +1566,9 @@ func (c *Collector) aggregateBackupHistory(ctx context.Context, jobsDir, target var buffers []json.RawMessage for _, entry := range entries { + if err := ctx.Err(); err != nil { + return err + } if entry.IsDir() { continue } @@ -1599,6 +1605,9 @@ func (c *Collector) aggregateBackupHistory(ctx context.Context, jobsDir, target } func (c *Collector) aggregateReplicationStatus(ctx context.Context, replicationDir, target string) error { + if err := ctx.Err(); err != nil { + return err + } entries, err := os.ReadDir(replicationDir) if err != nil { return err @@ -1606,6 +1615,9 @@ func (c *Collector) aggregateReplicationStatus(ctx context.Context, replicationD var buffers []json.RawMessage for _, entry := range entries { + if err := ctx.Err(); err != nil { + return err + } if entry.IsDir() { continue } diff --git a/internal/orchestrator/restore_access_control.go b/internal/orchestrator/restore_access_control.go index efdda05..118168f 100644 --- a/internal/orchestrator/restore_access_control.go +++ b/internal/orchestrator/restore_access_control.go @@ -91,6 +91,13 @@ func maybeApplyAccessControlFromStage(ctx context.Context, logger *logging.Logge } func applyPBSAccessControlFromStage(ctx context.Context, logger *logging.Logger, stageRoot string) (err error) { + if ctx == nil { + ctx = context.Background() + } + if err := ctx.Err(); err != nil { + return err + } + done := logging.DebugStart(logger, "pbs access control apply", "stage=%s", stageRoot) defer func() { done(err) }() @@ -283,17 +290,17 @@ func applyPBSACLFromStage(logger *logging.Logger, stagedACL string) error { // - header-style (section + indented keys) // - colon-delimited line format (acl::::) if pbsConfigHasHeader(raw) { - return applyPBSACLSectionFormat(logger, raw) + return applyPBSACLSectionFormat(raw) } if isPBSACLLineFormat(raw) { - return applyPBSACLLineFormat(logger, raw) + return applyPBSACLLineFormat(raw) } logger.Warning("PBS access control: staged acl.cfg has unknown format; skipping apply") return nil } -func applyPBSACLSectionFormat(logger *logging.Logger, raw string) error { +func applyPBSACLSectionFormat(raw string) error { backupSections, err := parseProxmoxNotificationSections(raw) if err != nil { return fmt.Errorf("parse staged acl.cfg: %w", err) @@ -369,7 +376,7 @@ func parsePBSACLLine(line string) (pbsACLLine, bool) { }, true } -func applyPBSACLLineFormat(logger *logging.Logger, raw string) error { +func applyPBSACLLineFormat(raw string) error { var outLines []string var hasRootAdmin bool @@ -667,6 +674,13 @@ func mustMarshalRaw(v any) json.RawMessage { } func applyPVEAccessControlFromStage(ctx context.Context, logger *logging.Logger, stageRoot string) (err error) { + if ctx == nil { + ctx = context.Background() + } + if err := ctx.Err(); err != nil { + return err + } + done := logging.DebugStart(logger, "pve access control apply", "stage=%s", stageRoot) defer func() { done(err) }() diff --git a/internal/orchestrator/restore_sdn.go b/internal/orchestrator/restore_sdn.go index 5cd81bf..01c9c59 100644 --- a/internal/orchestrator/restore_sdn.go +++ b/internal/orchestrator/restore_sdn.go @@ -12,6 +12,8 @@ import ( ) func maybeApplyPVESDNFromStage(ctx context.Context, logger *logging.Logger, plan *RestorePlan, stageRoot string, dryRun bool) (err error) { + _ = ctx // reserved for future timeouts/cancellation hooks + if plan == nil || plan.SystemType != SystemTypePVE || !plan.HasCategoryID("pve_sdn") { return nil } @@ -94,7 +96,7 @@ func applyPVESDNFromStage(logger *logging.Logger, stageRoot string) (applied []s applied = append(applied, destSDN) } } - } else if err != nil && !errors.Is(err, os.ErrNotExist) { + } else if !errors.Is(err, os.ErrNotExist) { return applied, fmt.Errorf("stat staged sdn %s: %w", stageSDN, err) } diff --git a/internal/orchestrator/restore_tui.go b/internal/orchestrator/restore_tui.go index 4eaaf94..8b41afd 100644 --- a/internal/orchestrator/restore_tui.go +++ b/internal/orchestrator/restore_tui.go @@ -412,7 +412,7 @@ func maybeRepairNICNamesTUI(ctx context.Context, logger *logging.Logger, archive return &nicRepairResult{AppliedAt: nowRestore(), SkippedReason: plan.SkippedReason} } - if plan != nil && !plan.Mapping.IsEmpty() { + if !plan.Mapping.IsEmpty() { logging.DebugStep(logger, "NIC repair", "Detect persistent NIC naming overrides (udev/systemd)") overrides, err := detectNICNamingOverrideRules(logger) if err != nil { @@ -840,35 +840,6 @@ func promptYesNoTUIWithCountdown(ctx context.Context, logger *logging.Logger, ti return result, nil } -func promptOkTUI(title, configPath, buildSig, message, okLabel string) error { - app := newTUIApp() - - infoText := tview.NewTextView(). - SetText(message). - SetWrap(true). - SetTextColor(tcell.ColorWhite). - SetDynamicColors(true) - - form := components.NewForm(app) - form.SetOnSubmit(func(values map[string]string) error { - return nil - }) - form.SetOnCancel(func() {}) - form.AddSubmitButton(okLabel) - form.AddCancelButton("Close") - enableFormNavigation(form, nil) - - content := tview.NewFlex(). - SetDirection(tview.FlexRow). - AddItem(infoText, 0, 1, false). - AddItem(form.Form, 3, 0, true) - - page := buildRestoreWizardPage(title, configPath, buildSig, content) - form.SetParentView(page) - - return app.SetRoot(page, true).SetFocus(form.Form).Run() -} - func promptNetworkCommitTUI(timeout time.Duration, health networkHealthReport, nicRepair *nicRepairResult, diagnosticsDir, configPath, buildSig string) (bool, error) { app := newTUIApp() var committed bool From ce01f3dee64c16e2c1d3be63aa545a6c58692a6e Mon Sep 17 00:00:00 2001 From: tis24dev Date: Thu, 12 Feb 2026 01:38:08 +0100 Subject: [PATCH 05/24] Refactor restore functions, tighten error checks Simplify and tighten various restore-related internals: - Consolidate service timeout variable formatting. - Replace redundant "err != nil && !errors.Is(...)" checks with a single "!errors.Is(err, os.ErrNotExist)" in fs_atomic.go and restore_firewall.go. - Improve confirmRestoreAction destination handling: clean and display the actual restore destination (root vs a subdirectory) and clarify warnings. - Remove unused parameters: detectNodeForVM no longer accepts vmEntry and restartPVEFirewallService/extractHardlink no longer accept a logger; update callers accordingly. - Adjust extractHardlink signature and callers/tests to match; update tests to remove logger construction and assert behavior against the new signatures. - Minor test updates and formatting tweaks. These changes reduce unused parameters, improve user-facing messaging during restore, and simplify error checks. Tests were updated to reflect the new function signatures. --- internal/orchestrator/fs_atomic.go | 4 +- internal/orchestrator/restore.go | 40 ++++++++++++-------- internal/orchestrator/restore_errors_test.go | 6 +-- internal/orchestrator/restore_firewall.go | 6 +-- internal/orchestrator/restore_test.go | 14 ++----- 5 files changed, 35 insertions(+), 35 deletions(-) diff --git a/internal/orchestrator/fs_atomic.go b/internal/orchestrator/fs_atomic.go index 476c845..879f0ce 100644 --- a/internal/orchestrator/fs_atomic.go +++ b/internal/orchestrator/fs_atomic.go @@ -75,7 +75,7 @@ func ensureDirExistsWithInheritedMeta(dir string) error { return nil } return fmt.Errorf("path exists but is not a directory: %s", dir) - } else if err != nil && !errors.Is(err, os.ErrNotExist) { + } else if !errors.Is(err, os.ErrNotExist) { return fmt.Errorf("stat %s: %w", dir, err) } @@ -123,7 +123,7 @@ func ensureDirExistsWithInheritedMeta(dir string) error { continue } return fmt.Errorf("path exists but is not a directory: %s", p) - } else if err != nil && !errors.Is(err, os.ErrNotExist) { + } else if !errors.Is(err, os.ErrNotExist) { return fmt.Errorf("stat %s: %w", p, err) } diff --git a/internal/orchestrator/restore.go b/internal/orchestrator/restore.go index 0369703..2f14d20 100644 --- a/internal/orchestrator/restore.go +++ b/internal/orchestrator/restore.go @@ -26,16 +26,15 @@ import ( var ErrRestoreAborted = errors.New("restore workflow aborted by user") var ( - serviceStopTimeout = 45 * time.Second - serviceStopNoBlockTimeout = 15 * time.Second - serviceStartTimeout = 30 * time.Second - serviceVerifyTimeout = 30 * time.Second - serviceStatusCheckTimeout = 5 * time.Second - servicePollInterval = 500 * time.Millisecond - serviceRetryDelay = 500 * time.Millisecond - restoreLogSequence uint64 - restoreGlob = filepath.Glob - prepareDecryptedBackupFunc = prepareDecryptedBackup + serviceStopTimeout = 45 * time.Second + serviceStopNoBlockTimeout = 15 * time.Second + serviceStartTimeout = 30 * time.Second + serviceVerifyTimeout = 30 * time.Second + serviceStatusCheckTimeout = 5 * time.Second + servicePollInterval = 500 * time.Millisecond + serviceRetryDelay = 500 * time.Millisecond + restoreLogSequence uint64 + restoreGlob = filepath.Glob ) // RestoreAbortInfo contains information about an aborted restore with network rollback. @@ -763,8 +762,17 @@ func confirmRestoreAction(ctx context.Context, reader *bufio.Reader, cand *decry manifest := cand.Manifest fmt.Println() fmt.Printf("Selected backup: %s (%s)\n", cand.DisplayBase, manifest.CreatedAt.Format("2006-01-02 15:04:05")) - fmt.Println("Restore destination: / (system root; original paths will be preserved)") - fmt.Println("WARNING: This operation will overwrite configuration files on this system.") + cleanDest := filepath.Clean(strings.TrimSpace(dest)) + if cleanDest == "" || cleanDest == "." { + cleanDest = string(os.PathSeparator) + } + if cleanDest == string(os.PathSeparator) { + fmt.Println("Restore destination: / (system root; original paths will be preserved)") + fmt.Println("WARNING: This operation will overwrite configuration files on this system.") + } else { + fmt.Printf("Restore destination: %s (original paths will be preserved under this directory)\n", cleanDest) + fmt.Printf("WARNING: This operation will overwrite existing files under %s.\n", cleanDest) + } fmt.Println("Type RESTORE to proceed or 0 to cancel.") for { @@ -978,7 +986,7 @@ func applyVMConfigs(ctx context.Context, entries []vmEntry, logger *logging.Logg logger.Warning("VM apply aborted: %v", err) return applied, failed } - target := fmt.Sprintf("/nodes/%s/%s/%s/config", detectNodeForVM(vm), vm.Kind, vm.VMID) + target := fmt.Sprintf("/nodes/%s/%s/%s/config", detectNodeForVM(), vm.Kind, vm.VMID) args := []string{"set", target, "--filename", vm.Path} if err := runPvesh(ctx, logger, args); err != nil { logger.Warning("Failed to apply %s (vmid=%s kind=%s): %v", target, vm.VMID, vm.Kind, err) @@ -995,7 +1003,7 @@ func applyVMConfigs(ctx context.Context, entries []vmEntry, logger *logging.Logg return applied, failed } -func detectNodeForVM(vm vmEntry) string { +func detectNodeForVM() string { host, _ := os.Hostname() host = shortHost(host) if host != "" { @@ -1564,7 +1572,7 @@ func extractTarEntry(tarReader *tar.Reader, header *tar.Header, destRoot string, case tar.TypeSymlink: return extractSymlink(target, header, cleanDestRoot, logger) case tar.TypeLink: - return extractHardlink(target, header, cleanDestRoot, logger) + return extractHardlink(target, header, cleanDestRoot) default: logger.Debug("Skipping unsupported file type %d: %s", header.Typeflag, header.Name) return nil @@ -1715,7 +1723,7 @@ func extractSymlink(target string, header *tar.Header, destRoot string, logger * } // extractHardlink creates a hard link -func extractHardlink(target string, header *tar.Header, destRoot string, logger *logging.Logger) error { +func extractHardlink(target string, header *tar.Header, destRoot string) error { // Validate hard link target linkName := header.Linkname diff --git a/internal/orchestrator/restore_errors_test.go b/internal/orchestrator/restore_errors_test.go index d324e4c..cad33a8 100644 --- a/internal/orchestrator/restore_errors_test.go +++ b/internal/orchestrator/restore_errors_test.go @@ -1011,8 +1011,7 @@ func TestExtractHardlink_AbsoluteTargetRejectedError(t *testing.T) { Typeflag: tar.TypeLink, } - logger := logging.New(logging.GetDefaultLogger().GetLevel(), false) - err := extractHardlink("/tmp/link", header, "/tmp", logger) + err := extractHardlink("/tmp/link", header, "/tmp") if err == nil || !strings.Contains(err.Error(), "absolute hardlink target not allowed") { t.Fatalf("expected absolute target error, got: %v", err) } @@ -1043,8 +1042,7 @@ func TestExtractHardlink_LinkCreationFails(t *testing.T) { } linkPath := filepath.Join(fakeFS.Root, "link") - logger := logging.New(logging.GetDefaultLogger().GetLevel(), false) - err := extractHardlink(linkPath, header, fakeFS.Root, logger) + err := extractHardlink(linkPath, header, fakeFS.Root) if err == nil || !strings.Contains(err.Error(), "hardlink") { t.Fatalf("expected link creation error, got: %v", err) } diff --git a/internal/orchestrator/restore_firewall.go b/internal/orchestrator/restore_firewall.go index 9e655c1..64c7419 100644 --- a/internal/orchestrator/restore_firewall.go +++ b/internal/orchestrator/restore_firewall.go @@ -233,7 +233,7 @@ func maybeApplyPVEFirewallWithUI( return nil } - if err := restartPVEFirewallService(ctx, logger); err != nil { + if err := restartPVEFirewallService(ctx); err != nil { logger.Warning("PVE firewall restore: reload/restart failed: %v", err) } @@ -300,7 +300,7 @@ func applyPVEFirewallFromStage(logger *logging.Logger, stageRoot string) (applie applied = append(applied, destFirewall) } } - } else if err != nil && !errors.Is(err, os.ErrNotExist) { + } else if !errors.Is(err, os.ErrNotExist) { return applied, fmt.Errorf("stat staged firewall config %s: %w", stageFirewall, err) } @@ -382,7 +382,7 @@ func selectStageHostFirewall(logger *logging.Logger, stageRoot string) (path str return "", "", false, nil } -func restartPVEFirewallService(ctx context.Context, logger *logging.Logger) error { +func restartPVEFirewallService(ctx context.Context) error { timeoutCtx, cancel := context.WithTimeout(ctx, 10*time.Second) defer cancel() diff --git a/internal/orchestrator/restore_test.go b/internal/orchestrator/restore_test.go index c9d008d..80a1f10 100644 --- a/internal/orchestrator/restore_test.go +++ b/internal/orchestrator/restore_test.go @@ -512,28 +512,26 @@ func TestExtractDirectory_Success(t *testing.T) { // -------------------------------------------------------------------------- func TestExtractHardlink_AbsoluteTargetRejected(t *testing.T) { - logger := logging.New(types.LogLevelDebug, false) header := &tar.Header{ Name: "link", Linkname: "/absolute/path", Typeflag: tar.TypeLink, } - err := extractHardlink("/tmp/dest", header, "/tmp/dest", logger) + err := extractHardlink("/tmp/dest", header, "/tmp/dest") if err == nil || !strings.Contains(err.Error(), "absolute hardlink target not allowed") { t.Fatalf("expected absolute target error, got: %v", err) } } func TestExtractHardlink_EscapesRoot(t *testing.T) { - logger := logging.New(types.LogLevelDebug, false) header := &tar.Header{ Name: "link", Linkname: "../../../etc/passwd", Typeflag: tar.TypeLink, } - err := extractHardlink("/tmp/dest/link", header, "/tmp/dest", logger) + err := extractHardlink("/tmp/dest/link", header, "/tmp/dest") if err == nil || !strings.Contains(err.Error(), "escapes root") { t.Fatalf("expected escape error, got: %v", err) } @@ -544,7 +542,6 @@ func TestExtractHardlink_Success(t *testing.T) { t.Cleanup(func() { restoreFS = orig }) restoreFS = osFS{} - logger := logging.New(types.LogLevelDebug, false) destRoot := t.TempDir() originalFile := filepath.Join(destRoot, "original.txt") linkFile := filepath.Join(destRoot, "link.txt") @@ -559,7 +556,7 @@ func TestExtractHardlink_Success(t *testing.T) { Typeflag: tar.TypeLink, } - if err := extractHardlink(linkFile, header, destRoot, logger); err != nil { + if err := extractHardlink(linkFile, header, destRoot); err != nil { t.Fatalf("extractHardlink failed: %v", err) } @@ -1002,10 +999,7 @@ func TestReadVMName_FileNotFound(t *testing.T) { // -------------------------------------------------------------------------- func TestDetectNodeForVM_ReturnsHostname(t *testing.T) { - entry := vmEntry{ - Path: "/export/etc/pve/nodes/node1/qemu-server/100.conf", - } - node := detectNodeForVM(entry) + node := detectNodeForVM() // detectNodeForVM returns the current hostname, not the node from path if node == "" { t.Fatalf("expected non-empty node from hostname") From 09eccebd850790152cd71defd47752f1df0b9351 Mon Sep 17 00:00:00 2001 From: tis24dev Date: Thu, 12 Feb 2026 01:58:28 +0100 Subject: [PATCH 06/24] Add prefilter-manual CLI and test adjustments Introduce a new cmd/prefilter-manual command to run the backup prefilter manually with configurable root, max-size and log-level flags. Tighten and clarify unit tests: adjust assertion message in collector_collectall_test.go, reformat table-driven cases in orchestrator/decrypt_test.go, and add a lint ignore for an intentional nil-context test in copyRawArtifactsToWorkdir. --- cmd/prefilter-manual/main.go | 59 ++++++++++++++ internal/backup/collector_collectall_test.go | 4 +- internal/orchestrator/decrypt_test.go | 85 ++++++++++---------- 3 files changed, 104 insertions(+), 44 deletions(-) create mode 100644 cmd/prefilter-manual/main.go diff --git a/cmd/prefilter-manual/main.go b/cmd/prefilter-manual/main.go new file mode 100644 index 0000000..6f0d1a8 --- /dev/null +++ b/cmd/prefilter-manual/main.go @@ -0,0 +1,59 @@ +package main + +import ( + "context" + "flag" + "os" + "path/filepath" + "strings" + + "github.com/tis24dev/proxsave/internal/backup" + "github.com/tis24dev/proxsave/internal/logging" + "github.com/tis24dev/proxsave/internal/types" +) + +func parseLogLevel(raw string) types.LogLevel { + switch strings.ToLower(strings.TrimSpace(raw)) { + case "debug": + return types.LogLevelDebug + case "info", "": + return types.LogLevelInfo + case "warning", "warn": + return types.LogLevelWarning + case "error": + return types.LogLevelError + default: + return types.LogLevelInfo + } +} + +func main() { + var ( + root string + maxSize int64 + levelLabel string + ) + + flag.StringVar(&root, "root", "/tmp/test_prefilter", "Root directory to run prefilter on") + flag.Int64Var(&maxSize, "max-size", 8*1024*1024, "Max file size (bytes) to prefilter") + flag.StringVar(&levelLabel, "log-level", "info", "Log level: debug|info|warn|error") + flag.Parse() + + root = filepath.Clean(strings.TrimSpace(root)) + if root == "" || root == "." { + root = string(os.PathSeparator) + } + + logger := logging.New(parseLogLevel(levelLabel), false) + logger.SetOutput(os.Stdout) + + cfg := backup.OptimizationConfig{ + EnablePrefilter: true, + PrefilterMaxFileSizeBytes: maxSize, + } + + if err := backup.ApplyOptimizations(context.Background(), logger, root, cfg); err != nil { + logger.Error("Prefilter failed: %v", err) + os.Exit(1) + } +} diff --git a/internal/backup/collector_collectall_test.go b/internal/backup/collector_collectall_test.go index aca9062..1bac94e 100644 --- a/internal/backup/collector_collectall_test.go +++ b/internal/backup/collector_collectall_test.go @@ -59,9 +59,9 @@ func TestCollectorCollectAll_PVEBranchWrapsCollectionError(t *testing.T) { collector := NewCollector(logger, cfg, t.TempDir(), types.ProxmoxVE, false) err := collector.CollectAll(context.Background()) if err == nil { - t.Fatalf("expected error, got %v", err) + t.Fatalf("expected error, got nil") } - if err == nil || !strings.Contains(err.Error(), "PVE collection failed:") { + if !strings.Contains(err.Error(), "PVE collection failed:") { t.Fatalf("expected wrapped PVE collection error, got %v", err) } } diff --git a/internal/orchestrator/decrypt_test.go b/internal/orchestrator/decrypt_test.go index d663df7..19faeea 100644 --- a/internal/orchestrator/decrypt_test.go +++ b/internal/orchestrator/decrypt_test.go @@ -34,19 +34,19 @@ func TestBuildDecryptPathOptions(t *testing.T) { wantPaths []string wantLabel []string }{ - { - name: "all paths enabled", - cfg: &config.Config{ - BackupPath: "/backup/local", - SecondaryEnabled: true, - SecondaryPath: "/backup/secondary", - CloudEnabled: true, - CloudRemote: "/backup/cloud", - }, - wantCount: 3, - wantPaths: []string{"/backup/local", "/backup/secondary", "/backup/cloud"}, - wantLabel: []string{"Local backups", "Secondary backups", "Cloud backups"}, + { + name: "all paths enabled", + cfg: &config.Config{ + BackupPath: "/backup/local", + SecondaryEnabled: true, + SecondaryPath: "/backup/secondary", + CloudEnabled: true, + CloudRemote: "/backup/cloud", }, + wantCount: 3, + wantPaths: []string{"/backup/local", "/backup/secondary", "/backup/cloud"}, + wantLabel: []string{"Local backups", "Secondary backups", "Cloud backups"}, + }, { name: "only local path", cfg: &config.Config{ @@ -91,28 +91,28 @@ func TestBuildDecryptPathOptions(t *testing.T) { wantPaths: []string{"/backup/local"}, wantLabel: []string{"Local backups"}, }, - { - name: "cloud with rclone remote included", - cfg: &config.Config{ - BackupPath: "/backup/local", - CloudEnabled: true, - CloudRemote: "gdrive:backups", // rclone remote - }, - wantCount: 2, - wantPaths: []string{"/backup/local", "gdrive:backups"}, - wantLabel: []string{"Local backups", "Cloud backups (rclone)"}, + { + name: "cloud with rclone remote included", + cfg: &config.Config{ + BackupPath: "/backup/local", + CloudEnabled: true, + CloudRemote: "gdrive:backups", // rclone remote }, - { - name: "cloud with local absolute path included", - cfg: &config.Config{ - BackupPath: "/backup/local", - CloudEnabled: true, - CloudRemote: "/mnt/cloud/backups", - }, - wantCount: 2, - wantPaths: []string{"/backup/local", "/mnt/cloud/backups"}, - wantLabel: []string{"Local backups", "Cloud backups"}, + wantCount: 2, + wantPaths: []string{"/backup/local", "gdrive:backups"}, + wantLabel: []string{"Local backups", "Cloud backups (rclone)"}, + }, + { + name: "cloud with local absolute path included", + cfg: &config.Config{ + BackupPath: "/backup/local", + CloudEnabled: true, + CloudRemote: "/mnt/cloud/backups", }, + wantCount: 2, + wantPaths: []string{"/backup/local", "/mnt/cloud/backups"}, + wantLabel: []string{"Local backups", "Cloud backups"}, + }, { name: "secondary enabled but path empty", cfg: &config.Config{ @@ -135,17 +135,17 @@ func TestBuildDecryptPathOptions(t *testing.T) { wantPaths: []string{"/backup/local"}, wantLabel: []string{"Local backups"}, }, - { - name: "cloud absolute with colon allowed", - cfg: &config.Config{ - BackupPath: "/backup/local", - CloudEnabled: true, - CloudRemote: "/mnt/backups:foo", - }, - wantCount: 2, - wantPaths: []string{"/backup/local", "/mnt/backups:foo"}, - wantLabel: []string{"Local backups", "Cloud backups"}, + { + name: "cloud absolute with colon allowed", + cfg: &config.Config{ + BackupPath: "/backup/local", + CloudEnabled: true, + CloudRemote: "/mnt/backups:foo", }, + wantCount: 2, + wantPaths: []string{"/backup/local", "/mnt/backups:foo"}, + wantLabel: []string{"Local backups", "Cloud backups"}, + }, { name: "all paths empty", cfg: &config.Config{}, @@ -2613,6 +2613,7 @@ func TestCopyRawArtifactsToWorkdir_NilContext(t *testing.T) { } // Pass nil context - function should use context.Background() + //lint:ignore SA1012 Intentional: verify nil ctx is treated as context.Background(). staged, err := copyRawArtifactsToWorkdirWithLogger(nil, cand, workDir, nil) if err != nil { t.Fatalf("copyRawArtifactsToWorkdirWithLogger error: %v", err) From 6ceb0e8e9fc53f79b3b3e869c691ca242aba00fb Mon Sep 17 00:00:00 2001 From: tis24dev Date: Thu, 12 Feb 2026 02:19:50 +0100 Subject: [PATCH 07/24] Add context cancellation support, refactor switches Add context handling and cancellation checks across orchestrator operations: createBundle and decrypt TUI now accept/guard nil contexts and check ctx.Err(), io.Copy uses a context-aware reader (contextReader) to allow cancellation. Update tests to use context.TODO and rename one test accordingly. Refactor multiple if/else branches to switch statements for systemType in categories, compatibility and restore UI code, and adjust logging to include step index in logStep. Miscellaneous: reformat category literals, simplify an error check in restore_ha, adjust bundle test to use a switch on header names, and remove an unused renderEnvValue helper from config/upgrade.go. --- internal/config/upgrade.go | 14 -- internal/orchestrator/bundle_test.go | 7 +- internal/orchestrator/categories.go | 220 ++++++++++--------- internal/orchestrator/compatibility.go | 5 +- internal/orchestrator/decrypt_test.go | 6 +- internal/orchestrator/decrypt_tui.go | 9 + internal/orchestrator/orchestrator.go | 29 ++- internal/orchestrator/restore_ha.go | 2 +- internal/orchestrator/restore_workflow_ui.go | 5 +- internal/orchestrator/selective.go | 7 +- 10 files changed, 165 insertions(+), 139 deletions(-) diff --git a/internal/config/upgrade.go b/internal/config/upgrade.go index 755f12c..d319a4a 100644 --- a/internal/config/upgrade.go +++ b/internal/config/upgrade.go @@ -558,17 +558,3 @@ func findClosingQuoteLine(lines []string, start int) (int, error) { } return 0, fmt.Errorf("closing quote not found") } - -func renderEnvValue(key string, value envValue) []string { - if value.kind == envValueKindBlock { - lines := []string{fmt.Sprintf("%s=\"", key)} - lines = append(lines, value.blockLines...) - lines = append(lines, "\"") - return lines - } - line := fmt.Sprintf("%s=%s", key, value.rawValue) - if value.comment != "" { - line += " " + value.comment - } - return []string{line} -} diff --git a/internal/orchestrator/bundle_test.go b/internal/orchestrator/bundle_test.go index 6a9305c..9462b4b 100644 --- a/internal/orchestrator/bundle_test.go +++ b/internal/orchestrator/bundle_test.go @@ -90,11 +90,12 @@ func TestCreateBundle_CreatesValidTarArchive(t *testing.T) { } expectedContent := testData[""] - if header.Name == "backup.tar.sha256" { + switch header.Name { + case "backup.tar.sha256": expectedContent = testData[".sha256"] - } else if header.Name == "backup.tar.metadata" { + case "backup.tar.metadata": expectedContent = testData[".metadata"] - } else if header.Name == "backup.tar.metadata.sha256" { + case "backup.tar.metadata.sha256": expectedContent = testData[".metadata.sha256"] } diff --git a/internal/orchestrator/categories.go b/internal/orchestrator/categories.go index 4dda31e..56483d8 100644 --- a/internal/orchestrator/categories.go +++ b/internal/orchestrator/categories.go @@ -166,41 +166,41 @@ func GetAllCategories() []Category { }, ExportOnly: true, }, - { - ID: "pbs_host", - Name: "PBS Host & Integrations", - Description: "Node settings, ACME configuration, proxy, external metric servers and traffic control rules", - Type: CategoryTypePBS, - Paths: []string{ - "./etc/proxmox-backup/node.cfg", - "./etc/proxmox-backup/proxy.cfg", - "./etc/proxmox-backup/acme/accounts.cfg", - "./etc/proxmox-backup/acme/plugins.cfg", - "./etc/proxmox-backup/metricserver.cfg", - "./etc/proxmox-backup/traffic-control.cfg", - "./var/lib/proxsave-info/commands/pbs/node_config.json", - "./var/lib/proxsave-info/commands/pbs/acme_accounts.json", - "./var/lib/proxsave-info/commands/pbs/acme_plugins.json", - "./var/lib/proxsave-info/commands/pbs/acme_account_*_info.json", - "./var/lib/proxsave-info/commands/pbs/acme_plugin_*_config.json", - "./var/lib/proxsave-info/commands/pbs/traffic_control.json", - }, + { + ID: "pbs_host", + Name: "PBS Host & Integrations", + Description: "Node settings, ACME configuration, proxy, external metric servers and traffic control rules", + Type: CategoryTypePBS, + Paths: []string{ + "./etc/proxmox-backup/node.cfg", + "./etc/proxmox-backup/proxy.cfg", + "./etc/proxmox-backup/acme/accounts.cfg", + "./etc/proxmox-backup/acme/plugins.cfg", + "./etc/proxmox-backup/metricserver.cfg", + "./etc/proxmox-backup/traffic-control.cfg", + "./var/lib/proxsave-info/commands/pbs/node_config.json", + "./var/lib/proxsave-info/commands/pbs/acme_accounts.json", + "./var/lib/proxsave-info/commands/pbs/acme_plugins.json", + "./var/lib/proxsave-info/commands/pbs/acme_account_*_info.json", + "./var/lib/proxsave-info/commands/pbs/acme_plugin_*_config.json", + "./var/lib/proxsave-info/commands/pbs/traffic_control.json", }, - { - ID: "datastore_pbs", - Name: "PBS Datastore Configuration", - Description: "Datastore definitions and settings (including S3 endpoint definitions)", - Type: CategoryTypePBS, - Paths: []string{ - "./etc/proxmox-backup/datastore.cfg", - "./etc/proxmox-backup/s3.cfg", - "./var/lib/proxsave-info/commands/pbs/datastore_list.json", - "./var/lib/proxsave-info/commands/pbs/datastore_*_status.json", - "./var/lib/proxsave-info/commands/pbs/s3_endpoints.json", - "./var/lib/proxsave-info/commands/pbs/s3_endpoint_*_buckets.json", - "./var/lib/proxsave-info/commands/pbs/pbs_datastore_inventory.json", - }, + }, + { + ID: "datastore_pbs", + Name: "PBS Datastore Configuration", + Description: "Datastore definitions and settings (including S3 endpoint definitions)", + Type: CategoryTypePBS, + Paths: []string{ + "./etc/proxmox-backup/datastore.cfg", + "./etc/proxmox-backup/s3.cfg", + "./var/lib/proxsave-info/commands/pbs/datastore_list.json", + "./var/lib/proxsave-info/commands/pbs/datastore_*_status.json", + "./var/lib/proxsave-info/commands/pbs/s3_endpoints.json", + "./var/lib/proxsave-info/commands/pbs/s3_endpoint_*_buckets.json", + "./var/lib/proxsave-info/commands/pbs/pbs_datastore_inventory.json", }, + }, { ID: "maintenance_pbs", Name: "PBS Maintenance", @@ -210,79 +210,79 @@ func GetAllCategories() []Category { "./etc/proxmox-backup/maintenance.cfg", }, }, - { - ID: "pbs_jobs", - Name: "PBS Jobs", - Description: "Sync, verify, and prune job configurations", - Type: CategoryTypePBS, - Paths: []string{ - "./etc/proxmox-backup/sync.cfg", - "./etc/proxmox-backup/verification.cfg", - "./etc/proxmox-backup/prune.cfg", - "./var/lib/proxsave-info/commands/pbs/sync_jobs.json", - "./var/lib/proxsave-info/commands/pbs/verification_jobs.json", - "./var/lib/proxsave-info/commands/pbs/prune_jobs.json", - "./var/lib/proxsave-info/commands/pbs/gc_jobs.json", - }, + { + ID: "pbs_jobs", + Name: "PBS Jobs", + Description: "Sync, verify, and prune job configurations", + Type: CategoryTypePBS, + Paths: []string{ + "./etc/proxmox-backup/sync.cfg", + "./etc/proxmox-backup/verification.cfg", + "./etc/proxmox-backup/prune.cfg", + "./var/lib/proxsave-info/commands/pbs/sync_jobs.json", + "./var/lib/proxsave-info/commands/pbs/verification_jobs.json", + "./var/lib/proxsave-info/commands/pbs/prune_jobs.json", + "./var/lib/proxsave-info/commands/pbs/gc_jobs.json", }, - { - ID: "pbs_remotes", - Name: "PBS Remotes", - Description: "Remote definitions for sync/verify jobs (may include credentials)", - Type: CategoryTypePBS, - Paths: []string{ - "./etc/proxmox-backup/remote.cfg", - "./var/lib/proxsave-info/commands/pbs/remote_list.json", - }, + }, + { + ID: "pbs_remotes", + Name: "PBS Remotes", + Description: "Remote definitions for sync/verify jobs (may include credentials)", + Type: CategoryTypePBS, + Paths: []string{ + "./etc/proxmox-backup/remote.cfg", + "./var/lib/proxsave-info/commands/pbs/remote_list.json", }, - { - ID: "pbs_notifications", - Name: "PBS Notifications", - Description: "Notification targets and matchers", - Type: CategoryTypePBS, - Paths: []string{ - "./etc/proxmox-backup/notifications.cfg", - "./etc/proxmox-backup/notifications-priv.cfg", - "./var/lib/proxsave-info/commands/pbs/notification_targets.json", - "./var/lib/proxsave-info/commands/pbs/notification_matchers.json", - "./var/lib/proxsave-info/commands/pbs/notification_endpoints_*.json", - }, + }, + { + ID: "pbs_notifications", + Name: "PBS Notifications", + Description: "Notification targets and matchers", + Type: CategoryTypePBS, + Paths: []string{ + "./etc/proxmox-backup/notifications.cfg", + "./etc/proxmox-backup/notifications-priv.cfg", + "./var/lib/proxsave-info/commands/pbs/notification_targets.json", + "./var/lib/proxsave-info/commands/pbs/notification_matchers.json", + "./var/lib/proxsave-info/commands/pbs/notification_endpoints_*.json", }, - { - ID: "pbs_access_control", - Name: "PBS Access Control", - Description: "Users, realms and permissions", - Type: CategoryTypePBS, - Paths: []string{ - "./etc/proxmox-backup/user.cfg", - "./etc/proxmox-backup/domains.cfg", - "./etc/proxmox-backup/acl.cfg", - "./etc/proxmox-backup/token.cfg", - "./etc/proxmox-backup/shadow.json", - "./etc/proxmox-backup/token.shadow", - "./etc/proxmox-backup/tfa.json", - "./var/lib/proxsave-info/commands/pbs/user_list.json", - "./var/lib/proxsave-info/commands/pbs/realms_ldap.json", - "./var/lib/proxsave-info/commands/pbs/realms_ad.json", - "./var/lib/proxsave-info/commands/pbs/realms_openid.json", - "./var/lib/proxsave-info/commands/pbs/acl_list.json", - }, + }, + { + ID: "pbs_access_control", + Name: "PBS Access Control", + Description: "Users, realms and permissions", + Type: CategoryTypePBS, + Paths: []string{ + "./etc/proxmox-backup/user.cfg", + "./etc/proxmox-backup/domains.cfg", + "./etc/proxmox-backup/acl.cfg", + "./etc/proxmox-backup/token.cfg", + "./etc/proxmox-backup/shadow.json", + "./etc/proxmox-backup/token.shadow", + "./etc/proxmox-backup/tfa.json", + "./var/lib/proxsave-info/commands/pbs/user_list.json", + "./var/lib/proxsave-info/commands/pbs/realms_ldap.json", + "./var/lib/proxsave-info/commands/pbs/realms_ad.json", + "./var/lib/proxsave-info/commands/pbs/realms_openid.json", + "./var/lib/proxsave-info/commands/pbs/acl_list.json", }, - { - ID: "pbs_tape", - Name: "PBS Tape Backup", - Description: "Tape jobs, pools, changers and tape encryption keys", - Type: CategoryTypePBS, - Paths: []string{ - "./etc/proxmox-backup/tape.cfg", - "./etc/proxmox-backup/tape-job.cfg", - "./etc/proxmox-backup/media-pool.cfg", - "./etc/proxmox-backup/tape-encryption-keys.json", - "./var/lib/proxsave-info/commands/pbs/tape_drives.json", - "./var/lib/proxsave-info/commands/pbs/tape_changers.json", - "./var/lib/proxsave-info/commands/pbs/tape_pools.json", - }, + }, + { + ID: "pbs_tape", + Name: "PBS Tape Backup", + Description: "Tape jobs, pools, changers and tape encryption keys", + Type: CategoryTypePBS, + Paths: []string{ + "./etc/proxmox-backup/tape.cfg", + "./etc/proxmox-backup/tape-job.cfg", + "./etc/proxmox-backup/media-pool.cfg", + "./etc/proxmox-backup/tape-encryption-keys.json", + "./var/lib/proxsave-info/commands/pbs/tape_drives.json", + "./var/lib/proxsave-info/commands/pbs/tape_changers.json", + "./var/lib/proxsave-info/commands/pbs/tape_pools.json", }, + }, // Common Categories { @@ -470,14 +470,17 @@ func GetCategoriesForSystem(systemType string) []Category { all := GetAllCategories() var categories []Category - for _, cat := range all { - if systemType == "pve" { - // PVE system: include PVE and common categories + switch systemType { + case "pve": + // PVE system: include PVE and common categories + for _, cat := range all { if cat.Type == CategoryTypePVE || cat.Type == CategoryTypeCommon { categories = append(categories, cat) } - } else if systemType == "pbs" { - // PBS system: include PBS and common categories + } + case "pbs": + // PBS system: include PBS and common categories + for _, cat := range all { if cat.Type == CategoryTypePBS || cat.Type == CategoryTypeCommon { categories = append(categories, cat) } @@ -566,14 +569,15 @@ func GetStorageModeCategories(systemType string) []Category { all := GetAllCategories() var categories []Category - if systemType == "pve" { + switch systemType { + case "pve": // PVE: cluster + storage + jobs + zfs + filesystem + storage stack for _, cat := range all { if cat.ID == "pve_cluster" || cat.ID == "storage_pve" || cat.ID == "pve_jobs" || cat.ID == "zfs" || cat.ID == "filesystem" || cat.ID == "storage_stack" { categories = append(categories, cat) } } - } else if systemType == "pbs" { + case "pbs": // PBS: config export + datastore + maintenance + jobs + remotes + zfs + filesystem + storage stack for _, cat := range all { if cat.ID == "pbs_config" || cat.ID == "datastore_pbs" || cat.ID == "maintenance_pbs" || cat.ID == "pbs_jobs" || cat.ID == "pbs_remotes" || cat.ID == "zfs" || cat.ID == "filesystem" || cat.ID == "storage_stack" { diff --git a/internal/orchestrator/compatibility.go b/internal/orchestrator/compatibility.go index 8e541cd..cc2eb3f 100644 --- a/internal/orchestrator/compatibility.go +++ b/internal/orchestrator/compatibility.go @@ -124,11 +124,12 @@ func GetSystemInfo() map[string]string { info["type_name"] = GetSystemTypeString(systemType) // Get version information - if systemType == SystemTypePVE { + switch systemType { + case SystemTypePVE: if content, err := compatFS.ReadFile("/etc/pve-release"); err == nil { info["version"] = strings.TrimSpace(string(content)) } - } else if systemType == SystemTypePBS { + case SystemTypePBS: if content, err := compatFS.ReadFile("/etc/proxmox-backup-release"); err == nil { info["version"] = strings.TrimSpace(string(content)) } diff --git a/internal/orchestrator/decrypt_test.go b/internal/orchestrator/decrypt_test.go index 19faeea..59be13c 100644 --- a/internal/orchestrator/decrypt_test.go +++ b/internal/orchestrator/decrypt_test.go @@ -2588,7 +2588,7 @@ func TestInspectRcloneMetadataManifest_RcloneFails(t *testing.T) { // copyRawArtifactsToWorkdirWithLogger coverage tests // ===================================== -func TestCopyRawArtifactsToWorkdir_NilContext(t *testing.T) { +func TestCopyRawArtifactsToWorkdir_ContextWorks(t *testing.T) { origFS := restoreFS restoreFS = osFS{} t.Cleanup(func() { restoreFS = origFS }) @@ -2612,9 +2612,7 @@ func TestCopyRawArtifactsToWorkdir_NilContext(t *testing.T) { RawChecksumPath: "", } - // Pass nil context - function should use context.Background() - //lint:ignore SA1012 Intentional: verify nil ctx is treated as context.Background(). - staged, err := copyRawArtifactsToWorkdirWithLogger(nil, cand, workDir, nil) + staged, err := copyRawArtifactsToWorkdirWithLogger(context.TODO(), cand, workDir, nil) if err != nil { t.Fatalf("copyRawArtifactsToWorkdirWithLogger error: %v", err) } diff --git a/internal/orchestrator/decrypt_tui.go b/internal/orchestrator/decrypt_tui.go index b0f23a8..655f7f5 100644 --- a/internal/orchestrator/decrypt_tui.go +++ b/internal/orchestrator/decrypt_tui.go @@ -340,12 +340,21 @@ func preparePlainBundleTUI(ctx context.Context, cand *decryptCandidate, version func decryptArchiveWithTUIPrompts(ctx context.Context, encryptedPath, outputPath, displayName, configPath, buildSig string, logger *logging.Logger) error { var promptError string + if ctx == nil { + ctx = context.Background() + } for { + if err := ctx.Err(); err != nil { + return err + } identities, err := promptDecryptIdentity(displayName, configPath, buildSig, promptError) if err != nil { return err } + if err := ctx.Err(); err != nil { + return err + } if err := decryptWithIdentity(encryptedPath, outputPath, identities...); err != nil { var noMatch *age.NoIdentityMatchError if errors.Is(err, age.ErrIncorrectIdentity) || errors.As(err, &noMatch) { diff --git a/internal/orchestrator/orchestrator.go b/internal/orchestrator/orchestrator.go index a0912d8..ccaa936 100644 --- a/internal/orchestrator/orchestrator.go +++ b/internal/orchestrator/orchestrator.go @@ -231,7 +231,7 @@ func (o *Orchestrator) logStep(step int, format string, args ...interface{}) { if len(args) > 0 { message = fmt.Sprintf(format, args...) } - o.logger.Step("%s", message) + o.logger.Step("[%d] %s", step, message) } // SetUpdateInfo records version update information discovered by the CLI layer. @@ -1054,6 +1054,13 @@ func (s *BackupStats) toPrometheusMetrics() *metrics.BackupMetrics { } func (o *Orchestrator) createBundle(ctx context.Context, archivePath string) (bundlePath string, err error) { + if ctx == nil { + ctx = context.Background() + } + if err := ctx.Err(); err != nil { + return "", err + } + logger := o.logger fs := o.filesystem() dir := filepath.Dir(archivePath) @@ -1073,6 +1080,9 @@ func (o *Orchestrator) createBundle(ctx context.Context, archivePath string) (bu } for _, file := range associated[:3] { + if err := ctx.Err(); err != nil { + return "", err + } if _, err := fs.Stat(filepath.Join(dir, file)); err != nil { return "", fmt.Errorf("associated file not found: %s: %w", file, err) } @@ -1093,6 +1103,9 @@ func (o *Orchestrator) createBundle(ctx context.Context, archivePath string) (bu // Add each associated file to the tar archive for _, filename := range associated { + if err := ctx.Err(); err != nil { + return "", err + } filePath := filepath.Join(dir, filename) // Get file info @@ -1119,7 +1132,7 @@ func (o *Orchestrator) createBundle(ctx context.Context, archivePath string) (bu return "", fmt.Errorf("failed to open %s: %w", filename, err) } - if _, err := io.Copy(tw, file); err != nil { + if _, err := io.Copy(tw, &contextReader{ctx: ctx, r: file}); err != nil { file.Close() return "", fmt.Errorf("failed to write %s to tar: %w", filename, err) } @@ -1144,6 +1157,18 @@ func (o *Orchestrator) createBundle(ctx context.Context, archivePath string) (bu return bundlePath, nil } +type contextReader struct { + ctx context.Context + r io.Reader +} + +func (cr *contextReader) Read(p []byte) (int, error) { + if err := cr.ctx.Err(); err != nil { + return 0, err + } + return cr.r.Read(p) +} + func (o *Orchestrator) removeAssociatedFiles(archivePath string) error { logger := o.logger fs := o.filesystem() diff --git a/internal/orchestrator/restore_ha.go b/internal/orchestrator/restore_ha.go index 5341941..d69db66 100644 --- a/internal/orchestrator/restore_ha.go +++ b/internal/orchestrator/restore_ha.go @@ -278,7 +278,7 @@ func stageHasPVEHAConfig(stageRoot string) (bool, error) { for _, candidate := range candidates { if _, err := restoreFS.Stat(candidate); err == nil { return true, nil - } else if err != nil && !errors.Is(err, os.ErrNotExist) { + } else if !errors.Is(err, os.ErrNotExist) { return false, fmt.Errorf("stat %s: %w", candidate, err) } } diff --git a/internal/orchestrator/restore_workflow_ui.go b/internal/orchestrator/restore_workflow_ui.go index ce0bf9b..f90c1ad 100644 --- a/internal/orchestrator/restore_workflow_ui.go +++ b/internal/orchestrator/restore_workflow_ui.go @@ -778,13 +778,14 @@ func runRestoreWorkflowWithUI(ctx context.Context, cfg *config.Config, logger *l logger.Info("") logger.Info("IMPORTANT: You may need to restart services for changes to take effect.") - if systemType == SystemTypePVE { + switch systemType { + case SystemTypePVE: if needsClusterRestore && clusterServicesStopped { logger.Info(" PVE services were stopped/restarted during restore; verify status with: pvecm status") } else { logger.Info(" PVE services: systemctl restart pve-cluster pvedaemon pveproxy") } - } else if systemType == SystemTypePBS { + case SystemTypePBS: if pbsServicesStopped { logger.Info(" PBS services were stopped/restarted during restore; verify status with: systemctl status proxmox-backup proxmox-backup-proxy") } else { diff --git a/internal/orchestrator/selective.go b/internal/orchestrator/selective.go index 18f7583..4b05ea2 100644 --- a/internal/orchestrator/selective.go +++ b/internal/orchestrator/selective.go @@ -158,11 +158,12 @@ func ShowRestoreModeMenuWithReader(ctx context.Context, reader *bufio.Reader, lo fmt.Println("Select restore mode:") fmt.Println(" [1] FULL restore - Restore everything from backup") - if systemType == SystemTypePVE { + switch systemType { + case SystemTypePVE: fmt.Println(" [2] STORAGE only - PVE cluster + storage + jobs + mounts") - } else if systemType == SystemTypePBS { + case SystemTypePBS: fmt.Println(" [2] DATASTORE only - PBS datastore definitions + sync/verify/prune jobs + mounts") - } else { + default: fmt.Println(" [2] STORAGE/DATASTORE only - Storage or datastore configuration") } From 8ef91ab4eb79ffd8a4eca6a876b805d422f0c6ba Mon Sep 17 00:00:00 2001 From: tis24dev Date: Thu, 12 Feb 2026 02:31:09 +0100 Subject: [PATCH 08/24] Refactor function signatures & simplify logging Remove logger coupling from several APIs and tidy up related logic. - Remove logger parameter from decryptArchiveWithSecretPrompt, buildNetworkPlanReport, targetNetworkEndpointFromConfig, applyPBSNodeCfgViaAPI, and validatePBSDatastoreReadOnly and update all callers. - Drop unused defaultNetworkHealthOptions and remove applyPBSHostConfigsFromStage (cleanup of dead/unused code). - Make guardMountPoint resilient to nil/expired contexts by ensuring ctx is non-nil and returning ctx.Err() early. - Adjust NewWithDeps to avoid copying the full Config into base (only set DryRun) to reduce unintended coupling. - Replace if/else with switch in RecreateDirectoriesFromConfig and ensure PBS datastore readonly warnings check logger != nil before logging. These changes simplify APIs by decoupling logging responsibilities, fix a nil-context issue, and remove some unused helpers to make call sites clearer. --- internal/orchestrator/decrypt.go | 6 ++--- internal/orchestrator/decrypt_workflow_ui.go | 4 +-- internal/orchestrator/deps.go | 1 - internal/orchestrator/directory_recreation.go | 11 ++++---- internal/orchestrator/mount_guard.go | 7 ++++++ .../orchestrator/network_apply_workflow_ui.go | 4 +-- internal/orchestrator/network_health.go | 11 -------- internal/orchestrator/network_plan.go | 8 +++--- internal/orchestrator/pbs_api_apply.go | 2 +- internal/orchestrator/pbs_staged_apply.go | 25 ++----------------- 10 files changed, 26 insertions(+), 53 deletions(-) diff --git a/internal/orchestrator/decrypt.go b/internal/orchestrator/decrypt.go index 92e9e6b..c995530 100644 --- a/internal/orchestrator/decrypt.go +++ b/internal/orchestrator/decrypt.go @@ -80,8 +80,8 @@ func RunDecryptWorkflowWithDeps(ctx context.Context, deps *Deps, version string) done := logging.DebugStart(logger, "decrypt workflow", "version=%s", version) defer func() { done(err) }() - ui := newCLIWorkflowUI(bufio.NewReader(os.Stdin), logger) - return runDecryptWorkflowWithUI(ctx, cfg, logger, version, ui) + ui := newCLIWorkflowUI(bufio.NewReader(os.Stdin), logger) + return runDecryptWorkflowWithUI(ctx, cfg, logger, version, ui) } // RunDecryptWorkflow is the legacy entrypoint that builds default deps. @@ -652,7 +652,7 @@ func copyRawArtifactsToWorkdirWithLogger(ctx context.Context, cand *decryptCandi func decryptArchiveWithPrompts(ctx context.Context, reader *bufio.Reader, encryptedPath, outputPath string, logger *logging.Logger) error { ui := newCLIWorkflowUI(reader, logger) displayName := filepath.Base(encryptedPath) - return decryptArchiveWithSecretPrompt(ctx, encryptedPath, outputPath, displayName, logger, ui.PromptDecryptSecret) + return decryptArchiveWithSecretPrompt(ctx, encryptedPath, outputPath, displayName, ui.PromptDecryptSecret) } func parseIdentityInput(input string) ([]age.Identity, error) { diff --git a/internal/orchestrator/decrypt_workflow_ui.go b/internal/orchestrator/decrypt_workflow_ui.go index 24d0377..a57d45a 100644 --- a/internal/orchestrator/decrypt_workflow_ui.go +++ b/internal/orchestrator/decrypt_workflow_ui.go @@ -137,7 +137,7 @@ func ensureWritablePathWithUI(ctx context.Context, ui DecryptWorkflowUI, targetP } } -func decryptArchiveWithSecretPrompt(ctx context.Context, encryptedPath, outputPath, displayName string, logger *logging.Logger, prompt func(ctx context.Context, displayName, previousError string) (string, error)) error { +func decryptArchiveWithSecretPrompt(ctx context.Context, encryptedPath, outputPath, displayName string, prompt func(ctx context.Context, displayName, previousError string) (string, error)) error { promptError := "" for { secret, err := prompt(ctx, displayName, promptError) @@ -246,7 +246,7 @@ func preparePlainBundleWithUI(ctx context.Context, cand *decryptCandidate, versi if strings.TrimSpace(displayName) == "" { displayName = filepath.Base(manifestCopy.ArchivePath) } - if err := decryptArchiveWithSecretPrompt(ctx, staged.ArchivePath, plainArchivePath, displayName, logger, ui.PromptDecryptSecret); err != nil { + if err := decryptArchiveWithSecretPrompt(ctx, staged.ArchivePath, plainArchivePath, displayName, ui.PromptDecryptSecret); err != nil { cleanup() return nil, err } diff --git a/internal/orchestrator/deps.go b/internal/orchestrator/deps.go index cb64194..648e20b 100644 --- a/internal/orchestrator/deps.go +++ b/internal/orchestrator/deps.go @@ -197,7 +197,6 @@ func NewWithDeps(deps Deps) *Orchestrator { base.Time = deps.Time } if deps.Config != nil { - base.Config = deps.Config base.DryRun = deps.Config.DryRun } diff --git a/internal/orchestrator/directory_recreation.go b/internal/orchestrator/directory_recreation.go index 19dc9f3..611aaff 100644 --- a/internal/orchestrator/directory_recreation.go +++ b/internal/orchestrator/directory_recreation.go @@ -294,7 +294,7 @@ func createPBSDatastoreStructure(basePath, datastoreName string, logger *logging // If the datastore already contains chunk/index data, avoid any modifications to prevent touching real backup data. // We only validate and report issues. if hasData { - if warn := validatePBSDatastoreReadOnly(basePath, logger); warn != "" { + if warn := validatePBSDatastoreReadOnly(basePath); warn != "" { logger.Warning("PBS datastore preflight: %s", warn) } logger.Info("PBS datastore preflight: datastore %s appears to contain data; skipping directory/permission changes to avoid risking datastore contents", datastoreName) @@ -371,7 +371,7 @@ func createPBSDatastoreStructure(basePath, datastoreName string, logger *logging return changed, nil } -func validatePBSDatastoreReadOnly(datastorePath string, logger *logging.Logger) string { +func validatePBSDatastoreReadOnly(datastorePath string) string { if datastorePath == "" { return "datastore path is empty" } @@ -843,15 +843,16 @@ func isIgnorableOwnershipError(err error) bool { func RecreateDirectoriesFromConfig(systemType SystemType, logger *logging.Logger) error { logger.Info("Recreating directory structures from configuration...") - if systemType == SystemTypePVE { + switch systemType { + case SystemTypePVE: if err := RecreateStorageDirectories(logger); err != nil { return fmt.Errorf("recreate PVE storage directories: %w", err) } - } else if systemType == SystemTypePBS { + case SystemTypePBS: if err := RecreateDatastoreDirectories(logger); err != nil { return fmt.Errorf("recreate PBS datastore directories: %w", err) } - } else { + default: logger.Debug("Unknown system type, skipping directory recreation") } diff --git a/internal/orchestrator/mount_guard.go b/internal/orchestrator/mount_guard.go index bc352f9..037811d 100644 --- a/internal/orchestrator/mount_guard.go +++ b/internal/orchestrator/mount_guard.go @@ -220,6 +220,13 @@ func maybeApplyPBSDatastoreMountGuards(ctx context.Context, logger *logging.Logg } func guardMountPoint(ctx context.Context, guardTarget string) error { + if ctx == nil { + ctx = context.Background() + } + if err := ctx.Err(); err != nil { + return err + } + target := filepath.Clean(strings.TrimSpace(guardTarget)) if target == "" || target == "." || target == string(os.PathSeparator) { return fmt.Errorf("invalid guard target: %q", guardTarget) diff --git a/internal/orchestrator/network_apply_workflow_ui.go b/internal/orchestrator/network_apply_workflow_ui.go index 14e931b..b150c89 100644 --- a/internal/orchestrator/network_apply_workflow_ui.go +++ b/internal/orchestrator/network_apply_workflow_ui.go @@ -267,7 +267,7 @@ func applyNetworkWithRollbackWithUI(ctx context.Context, ui RestoreWorkflowUI, l if strings.TrimSpace(iface) != "" { if cur, err := currentNetworkEndpoint(ctx, iface, 2*time.Second); err == nil { - if tgt, err := targetNetworkEndpointFromConfig(logger, iface); err == nil { + if tgt, err := targetNetworkEndpointFromConfig(iface); err == nil { logger.Info("Network plan: %s -> %s", cur.summary(), tgt.summary()) } } @@ -275,7 +275,7 @@ func applyNetworkWithRollbackWithUI(ctx context.Context, ui RestoreWorkflowUI, l if diagnosticsDir != "" { logging.DebugStep(logger, "network safe apply (ui)", "Write network plan (current -> target)") - if planText, err := buildNetworkPlanReport(ctx, logger, iface, source, 2*time.Second); err != nil { + if planText, err := buildNetworkPlanReport(ctx, iface, source, 2*time.Second); err != nil { logger.Debug("Network plan build failed: %v", err) } else if strings.TrimSpace(planText) != "" { if path, err := writeNetworkTextReportFile(diagnosticsDir, "plan.txt", planText+"\n"); err != nil { diff --git a/internal/orchestrator/network_health.go b/internal/orchestrator/network_health.go index 2c7faed..8b583f5 100644 --- a/internal/orchestrator/network_health.go +++ b/internal/orchestrator/network_health.go @@ -89,17 +89,6 @@ type networkHealthOptions struct { LocalPortChecks []tcpPortCheck } -func defaultNetworkHealthOptions() networkHealthOptions { - return networkHealthOptions{ - SystemType: SystemTypeUnknown, - Logger: nil, - CommandTimeout: 3 * time.Second, - EnableGatewayPing: true, - ForceSSHRouteCheck: false, - EnableDNSResolve: true, - } -} - type tcpPortCheck struct { Name string Address string diff --git a/internal/orchestrator/network_plan.go b/internal/orchestrator/network_plan.go index 7c07711..11c1eb9 100644 --- a/internal/orchestrator/network_plan.go +++ b/internal/orchestrator/network_plan.go @@ -7,8 +7,6 @@ import ( "sort" "strings" "time" - - "github.com/tis24dev/proxsave/internal/logging" ) type networkEndpoint struct { @@ -33,7 +31,7 @@ func (e networkEndpoint) summary() string { return fmt.Sprintf("iface=%s ip=%s gw=%s", iface, addrs, gw) } -func buildNetworkPlanReport(ctx context.Context, logger *logging.Logger, iface, source string, timeout time.Duration) (string, error) { +func buildNetworkPlanReport(ctx context.Context, iface, source string, timeout time.Duration) (string, error) { if strings.TrimSpace(iface) == "" { return fmt.Sprintf("Network plan\n\n- Management interface: n/a\n- Detection source: %s\n", strings.TrimSpace(source)), nil } @@ -42,7 +40,7 @@ func buildNetworkPlanReport(ctx context.Context, logger *logging.Logger, iface, } current, _ := currentNetworkEndpoint(ctx, iface, timeout) - target, _ := targetNetworkEndpointFromConfig(logger, iface) + target, _ := targetNetworkEndpointFromConfig(iface) var b strings.Builder b.WriteString("Network plan\n\n") @@ -77,7 +75,7 @@ func currentNetworkEndpoint(ctx context.Context, iface string, timeout time.Dura return ep, nil } -func targetNetworkEndpointFromConfig(logger *logging.Logger, iface string) (networkEndpoint, error) { +func targetNetworkEndpointFromConfig(iface string) (networkEndpoint, error) { ep := networkEndpoint{Interface: strings.TrimSpace(iface)} if ep.Interface == "" { return ep, fmt.Errorf("empty interface") diff --git a/internal/orchestrator/pbs_api_apply.go b/internal/orchestrator/pbs_api_apply.go index 00961d3..cee5c00 100644 --- a/internal/orchestrator/pbs_api_apply.go +++ b/internal/orchestrator/pbs_api_apply.go @@ -659,7 +659,7 @@ func applyPBSTrafficControlCfgViaAPI(ctx context.Context, logger *logging.Logger return nil } -func applyPBSNodeCfgViaAPI(ctx context.Context, logger *logging.Logger, stageRoot string) error { +func applyPBSNodeCfgViaAPI(ctx context.Context, stageRoot string) error { raw, present, err := readStageFileOptional(stageRoot, "etc/proxmox-backup/node.cfg") if err != nil { return err diff --git a/internal/orchestrator/pbs_staged_apply.go b/internal/orchestrator/pbs_staged_apply.go index c469deb..be72037 100644 --- a/internal/orchestrator/pbs_staged_apply.go +++ b/internal/orchestrator/pbs_staged_apply.go @@ -80,7 +80,7 @@ func maybeApplyPBSConfigsFromStage(ctx context.Context, logger *logging.Logger, _ = applyPBSConfigFileFromStage(ctx, logger, stageRoot, "etc/proxmox-backup/traffic-control.cfg") } } - if err := applyPBSNodeCfgViaAPI(ctx, logger, stageRoot); err != nil { + if err := applyPBSNodeCfgViaAPI(ctx, stageRoot); err != nil { logger.Warning("PBS API apply: node config failed: %v", err) if allowFileFallback { logger.Warning("PBS staged apply: falling back to file-based node.cfg") @@ -202,27 +202,6 @@ func applyPBSS3CfgFromStage(ctx context.Context, logger *logging.Logger, stageRo return applyPBSConfigFileFromStage(ctx, logger, stageRoot, "etc/proxmox-backup/s3.cfg") } -func applyPBSHostConfigsFromStage(ctx context.Context, logger *logging.Logger, stageRoot string) (err error) { - done := logging.DebugStart(logger, "pbs staged apply host configs", "stage=%s", stageRoot) - defer func() { done(err) }() - - // ACME should be applied before node.cfg (node.cfg references ACME account/plugins). - paths := []string{ - "etc/proxmox-backup/acme/accounts.cfg", - "etc/proxmox-backup/acme/plugins.cfg", - "etc/proxmox-backup/metricserver.cfg", - "etc/proxmox-backup/traffic-control.cfg", - "etc/proxmox-backup/proxy.cfg", - "etc/proxmox-backup/node.cfg", - } - for _, rel := range paths { - if err := applyPBSConfigFileFromStage(ctx, logger, stageRoot, rel); err != nil { - logger.Warning("PBS staged apply: %s: %v", rel, err) - } - } - return nil -} - func applyPBSTapeConfigsFromStage(ctx context.Context, logger *logging.Logger, stageRoot string) (err error) { done := logging.DebugStart(logger, "pbs staged apply tape configs", "stage=%s", stageRoot) defer func() { done(err) }() @@ -521,7 +500,7 @@ func shouldApplyPBSDatastoreBlock(block pbsDatastoreBlock, logger *logging.Logge } if hasData { - if warn := validatePBSDatastoreReadOnly(path, logger); warn != "" { + if warn := validatePBSDatastoreReadOnly(path); warn != "" && logger != nil { logger.Warning("PBS datastore preflight: %s", warn) } return true, "" From 40fe249c082ddde859f21b2f1197bf8628219c9f Mon Sep 17 00:00:00 2001 From: tis24dev Date: Thu, 12 Feb 2026 02:54:50 +0100 Subject: [PATCH 09/24] Improve PBS list parsing and notification cleanup parsePBSListIDs: sanitize and validate candidate keys, error when none provided, and make row parsing stricter and more informative. The function now trims empty keys up-front, iterates using the cleaned key list, ensures values are non-empty strings, and returns a descriptive error (including row index and available keys) instead of silently skipping unparseable rows. applyPBSNotificationsViaAPI: move matcher cleanup earlier in strict mode so matchers are removed before endpoints (preventing endpoint cleanup from being blocked by references). The matcher handling was consolidated (deduplicated) and matcher names are sorted for deterministic processing. --- internal/orchestrator/pbs_api_apply.go | 45 +++++++----- .../pbs_notifications_api_apply.go | 71 ++++++++++--------- 2 files changed, 64 insertions(+), 52 deletions(-) diff --git a/internal/orchestrator/pbs_api_apply.go b/internal/orchestrator/pbs_api_apply.go index cee5c00..830bcc4 100644 --- a/internal/orchestrator/pbs_api_apply.go +++ b/internal/orchestrator/pbs_api_apply.go @@ -115,6 +115,18 @@ func parsePBSListIDs(raw []byte, candidateKeys ...string) ([]string, error) { return nil, nil } + keys := make([]string, 0, len(candidateKeys)) + for _, k := range candidateKeys { + k = strings.TrimSpace(k) + if k == "" { + continue + } + keys = append(keys, k) + } + if len(keys) == 0 { + return nil, fmt.Errorf("no candidate keys provided for PBS list ID parsing") + } + var rows []map[string]any if err := json.Unmarshal(data, &rows); err != nil { return nil, err @@ -122,30 +134,29 @@ func parsePBSListIDs(raw []byte, candidateKeys ...string) ([]string, error) { out := make([]string, 0, len(rows)) seen := make(map[string]struct{}, len(rows)) - for _, row := range rows { + for idx, row := range rows { id := "" - for _, k := range candidateKeys { - k = strings.TrimSpace(k) - if k == "" { + for _, k := range keys { + v, ok := row[k] + if !ok || v == nil { continue } - if v, ok := row[k]; ok { - if s, ok := v.(string); ok { - id = strings.TrimSpace(s) - break - } + s, ok := v.(string) + if !ok { + continue } - } - if id == "" { - for _, v := range row { - if s, ok := v.(string); ok { - id = strings.TrimSpace(s) - break - } + id = strings.TrimSpace(s) + if id != "" { + break } } if id == "" { - continue + available := make([]string, 0, len(row)) + for k := range row { + available = append(available, k) + } + sort.Strings(available) + return nil, fmt.Errorf("failed to parse PBS list row %d: none of %v present as non-empty string (available keys: %v)", idx, keys, available) } if _, ok := seen[id]; ok { continue diff --git a/internal/orchestrator/pbs_notifications_api_apply.go b/internal/orchestrator/pbs_notifications_api_apply.go index 122d659..032debb 100644 --- a/internal/orchestrator/pbs_notifications_api_apply.go +++ b/internal/orchestrator/pbs_notifications_api_apply.go @@ -135,6 +135,42 @@ func applyPBSNotificationsViaAPI(ctx context.Context, logger *logging.Logger, st } } + // In strict mode, remove matchers first so endpoint cleanup isn't blocked by references. + desiredMatchers := make(map[string]proxmoxNotificationSection, len(matchers)) + for _, m := range matchers { + name := strings.TrimSpace(m.Name) + if name == "" { + continue + } + desiredMatchers[name] = m + } + + matcherNames := make([]string, 0, len(desiredMatchers)) + for name := range desiredMatchers { + matcherNames = append(matcherNames, name) + } + sort.Strings(matcherNames) + + if strict { + out, err := runPBSManager(ctx, "notification", "matcher", "list", "--output-format=json") + if err != nil { + return err + } + current, err := parsePBSListIDs(out, "name", "id") + if err != nil { + return fmt.Errorf("parse matcher list: %w", err) + } + for _, name := range current { + if _, ok := desiredMatchers[name]; ok { + continue + } + if _, err := runPBSManager(ctx, "notification", "matcher", "remove", name); err != nil { + // Built-in matchers may not be removable; keep going. + logger.Warning("PBS notifications API apply: matcher remove %s failed (continuing): %v", name, err) + } + } + } + // Endpoints first (matchers refer to targets/endpoints). for _, typ := range []string{"smtp", "sendmail", "gotify", "webhook"} { desiredNames := make(map[string]endpointSection) @@ -191,41 +227,6 @@ func applyPBSNotificationsViaAPI(ctx context.Context, logger *logging.Logger, st } // Then matchers. - desiredMatchers := make(map[string]proxmoxNotificationSection, len(matchers)) - for _, m := range matchers { - name := strings.TrimSpace(m.Name) - if name == "" { - continue - } - desiredMatchers[name] = m - } - - matcherNames := make([]string, 0, len(desiredMatchers)) - for name := range desiredMatchers { - matcherNames = append(matcherNames, name) - } - sort.Strings(matcherNames) - - if strict { - out, err := runPBSManager(ctx, "notification", "matcher", "list", "--output-format=json") - if err != nil { - return err - } - current, err := parsePBSListIDs(out, "name", "id") - if err != nil { - return fmt.Errorf("parse matcher list: %w", err) - } - for _, name := range current { - if _, ok := desiredMatchers[name]; ok { - continue - } - if _, err := runPBSManager(ctx, "notification", "matcher", "remove", name); err != nil { - // Built-in matchers may not be removable; keep going. - logger.Warning("PBS notifications API apply: matcher remove %s failed (continuing): %v", name, err) - } - } - } - for _, name := range matcherNames { m := desiredMatchers[name] flags := buildProxmoxManagerFlags(m.Entries) From 3c96a9bfc0dc74b963a95c8559b311b90b64bd59 Mon Sep 17 00:00:00 2001 From: Damiano <71268257+tis24dev@users.noreply.github.com> Date: Sun, 15 Feb 2026 23:27:28 +0100 Subject: [PATCH 10/24] Add chunking, reassembly and selective restore support Implement smart file chunking and reassembly with metadata and tests. Introduces chunkedFileMetadata, splitFile returning chunk/SHA256 info, robust writeChunk logic, and marker (.chunked) handling in chunkLargeFiles; adds ReassembleChunkedFiles, discoverChunks (numeric sort), concatenateChunks, validation and metadata application (chmod/chown/mtime). Enhance normalization/minification helpers (safe config normalization, json.Compact-based minify) and add extensive tests covering chunking, reassembly, JSON/minify behavior and config normalization. Update orchestrator to map chunk artifacts back to original paths (originalPathFromChunk), improve path matching for selective restores, and trigger reassembly after extract; also fix a decrypt test script argument. --- internal/backup/chunking_hardening_test.go | 192 ++++++ internal/backup/optimizations.go | 549 +++++++++++++++++- internal/backup/optimizations_helpers_test.go | 241 +++++++- internal/orchestrator/chunking_paths.go | 28 + internal/orchestrator/decrypt_test.go | 3 +- internal/orchestrator/restore.go | 38 +- .../restore_chunking_selective_test.go | 112 ++++ internal/orchestrator/selective.go | 71 ++- internal/orchestrator/selective_pure_test.go | 5 +- 9 files changed, 1182 insertions(+), 57 deletions(-) create mode 100644 internal/backup/chunking_hardening_test.go create mode 100644 internal/orchestrator/chunking_paths.go create mode 100644 internal/orchestrator/restore_chunking_selective_test.go diff --git a/internal/backup/chunking_hardening_test.go b/internal/backup/chunking_hardening_test.go new file mode 100644 index 0000000..1f0a33c --- /dev/null +++ b/internal/backup/chunking_hardening_test.go @@ -0,0 +1,192 @@ +package backup + +import ( + "bytes" + "context" + "encoding/json" + "os" + "path/filepath" + "testing" + "time" + + "github.com/tis24dev/proxsave/internal/logging" + "github.com/tis24dev/proxsave/internal/types" +) + +func TestDiscoverChunksSortsNumerically(t *testing.T) { + root := t.TempDir() + base := filepath.Join(root, "chunked_files", "big.bin") + if err := os.MkdirAll(filepath.Dir(base), 0o755); err != nil { + t.Fatal(err) + } + + // Create chunk files in mixed order (including >999) to ensure numeric sort. + chunkPaths := []string{ + filepath.Join(filepath.Dir(base), "big.bin.010.chunk"), + filepath.Join(filepath.Dir(base), "big.bin.001.chunk"), + filepath.Join(filepath.Dir(base), "big.bin.1000.chunk"), + filepath.Join(filepath.Dir(base), "big.bin.002.chunk"), + filepath.Join(filepath.Dir(base), "big.bin.999.chunk"), + filepath.Join(filepath.Dir(base), "big.bin.003.chunk"), + } + for _, p := range chunkPaths { + if err := os.WriteFile(p, []byte("x"), 0o640); err != nil { + t.Fatalf("write %s: %v", p, err) + } + } + + chunks, err := discoverChunks(base) + if err != nil { + t.Fatalf("discoverChunks: %v", err) + } + + got := make([]int, 0, len(chunks)) + for _, c := range chunks { + got = append(got, c.Index) + } + want := []int{1, 2, 3, 10, 999, 1000} + if len(got) != len(want) { + t.Fatalf("got %d chunks, want %d (%v)", len(got), len(want), got) + } + for i := range want { + if got[i] != want[i] { + t.Fatalf("numeric sort mismatch: got %v want %v", got, want) + } + } +} + +func TestReassembleChunkedFiles_SkipsWhenLastChunkMissing(t *testing.T) { + root := t.TempDir() + logger := logging.New(types.LogLevelError, false) + + originalPath := filepath.Join(root, "file.bin") + markerPath := originalPath + ".chunked" + chunkDir := filepath.Join(root, "chunked_files") + if err := os.MkdirAll(chunkDir, 0o755); err != nil { + t.Fatal(err) + } + + meta := chunkedFileMetadata{ + Version: 1, + SizeBytes: 9, + ChunkSizeBytes: 4, + ChunkCount: 3, // expects 3 chunks + Mode: 0o640, + UID: -1, + GID: -1, + ModTimeUnixNano: time.Now().UnixNano(), + } + payload, _ := json.Marshal(meta) + if err := os.WriteFile(markerPath, payload, 0o640); err != nil { + t.Fatal(err) + } + + // Only two chunks present -> should not reassemble. + if err := os.WriteFile(filepath.Join(chunkDir, "file.bin.001.chunk"), []byte("abcd"), 0o640); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(chunkDir, "file.bin.002.chunk"), []byte("efgh"), 0o640); err != nil { + t.Fatal(err) + } + + if err := ReassembleChunkedFiles(logger, root); err != nil { + t.Fatalf("ReassembleChunkedFiles: %v", err) + } + + if _, err := os.Stat(originalPath); !os.IsNotExist(err) { + t.Fatalf("expected original not to be created, stat err=%v", err) + } + if _, err := os.Stat(markerPath); err != nil { + t.Fatalf("expected marker to remain, stat err=%v", err) + } +} + +func TestReassembleChunkedFiles_SkipsWhenSHA256Mismatch(t *testing.T) { + root := t.TempDir() + logger := logging.New(types.LogLevelError, false) + + originalPath := filepath.Join(root, "file.bin") + markerPath := originalPath + ".chunked" + chunkDir := filepath.Join(root, "chunked_files") + if err := os.MkdirAll(chunkDir, 0o755); err != nil { + t.Fatal(err) + } + + data := []byte("hello world") + meta := chunkedFileMetadata{ + Version: 1, + SizeBytes: int64(len(data)), + ChunkSizeBytes: 8, + ChunkCount: 2, + SHA256: "0000000000000000000000000000000000000000000000000000000000000000", // wrong + Mode: 0o640, + UID: -1, + GID: -1, + ModTimeUnixNano: time.Now().UnixNano(), + } + payload, _ := json.Marshal(meta) + if err := os.WriteFile(markerPath, payload, 0o640); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(chunkDir, "file.bin.001.chunk"), data[:8], 0o640); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(chunkDir, "file.bin.002.chunk"), data[8:], 0o640); err != nil { + t.Fatal(err) + } + + if err := ReassembleChunkedFiles(logger, root); err != nil { + t.Fatalf("ReassembleChunkedFiles: %v", err) + } + + if _, err := os.Stat(originalPath); !os.IsNotExist(err) { + t.Fatalf("expected original not to be created, stat err=%v", err) + } + if _, err := os.Stat(markerPath); err != nil { + t.Fatalf("expected marker to remain, stat err=%v", err) + } +} + +func TestChunkAndReassemble_PreservesModeAndMtime(t *testing.T) { + root := t.TempDir() + logger := logging.New(types.LogLevelError, false) + + original := bytes.Repeat([]byte("ABCDEFGHIJKLMNOP"), 6) // 96 bytes + target := filepath.Join(root, "subdir", "large.bin") + if err := os.MkdirAll(filepath.Dir(target), 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(target, original, 0o600); err != nil { + t.Fatal(err) + } + mt := time.Unix(1700000000, 123456789) + if err := os.Chtimes(target, mt, mt); err != nil { + t.Fatal(err) + } + + if err := chunkLargeFiles(context.Background(), logger, root, 16, 64); err != nil { + t.Fatalf("chunkLargeFiles: %v", err) + } + if err := ReassembleChunkedFiles(logger, root); err != nil { + t.Fatalf("ReassembleChunkedFiles: %v", err) + } + + got, err := os.ReadFile(target) + if err != nil { + t.Fatalf("read reassembled: %v", err) + } + if !bytes.Equal(got, original) { + t.Fatalf("content mismatch: got %d bytes want %d bytes", len(got), len(original)) + } + + info, err := os.Stat(target) + if err != nil { + t.Fatalf("stat reassembled: %v", err) + } + if info.Mode().Perm() != 0o600 { + t.Fatalf("mode mismatch: got %o want %o", info.Mode().Perm(), 0o600) + } + if info.ModTime().UnixNano() != mt.UnixNano() { + t.Fatalf("mtime mismatch: got %d want %d", info.ModTime().UnixNano(), mt.UnixNano()) + } +} diff --git a/internal/backup/optimizations.go b/internal/backup/optimizations.go index f31943f..5797495 100644 --- a/internal/backup/optimizations.go +++ b/internal/backup/optimizations.go @@ -6,10 +6,15 @@ import ( "crypto/sha256" "encoding/json" "fmt" + "hash" "io" "os" "path/filepath" + "sort" + "strconv" "strings" + "syscall" + "time" "github.com/tis24dev/proxsave/internal/logging" ) @@ -23,6 +28,18 @@ const ( defaultChunkFilePerm = 0o640 ) +type chunkedFileMetadata struct { + Version int `json:"version"` + SizeBytes int64 `json:"size_bytes"` + ChunkSizeBytes int64 `json:"chunk_size_bytes"` + ChunkCount int `json:"chunk_count"` + SHA256 string `json:"sha256,omitempty"` + Mode uint32 `json:"mode"` + UID int `json:"uid"` + GID int `json:"gid"` + ModTimeUnixNano int64 `json:"mod_time_unix_nano"` +} + // OptimizationConfig controls optional preprocessing steps executed before archiving. type OptimizationConfig struct { EnableChunking bool @@ -203,6 +220,9 @@ func chunkLargeFiles(ctx context.Context, logger *logging.Logger, root string, c } return nil } + if d.Type()&os.ModeSymlink != 0 { + return nil + } if strings.HasPrefix(path, chunkDir) { return nil } @@ -220,15 +240,61 @@ func chunkLargeFiles(ctx context.Context, logger *logging.Logger, root string, c return nil } destBase := filepath.Join(chunkDir, rel) - if err := splitFile(path, destBase, chunkSize); err != nil { + + meta := chunkedFileMetadata{ + Version: 1, + ChunkSizeBytes: chunkSize, + Mode: uint32(info.Mode()), + UID: -1, + GID: -1, + ModTimeUnixNano: info.ModTime().UnixNano(), + } + if stat, ok := info.Sys().(*syscall.Stat_t); ok && stat != nil { + meta.UID = int(stat.Uid) + meta.GID = int(stat.Gid) + } + + result, err := splitFile(path, destBase, chunkSize) + if err != nil { logger.Warning("Failed to chunk %s: %v", path, err) return nil } + meta.SizeBytes = result.SizeBytes + meta.ChunkCount = result.ChunkCount + meta.SHA256 = result.SHA256 + + cleanupChunks := func() { + chunks, err := discoverChunks(destBase) + if err != nil { + return + } + for _, c := range chunks { + _ = os.Remove(c.Path) + } + } + + markerPath := path + ".chunked" + payload, err := json.Marshal(meta) + if err != nil { + logger.Warning("Failed to encode chunk metadata for %s: %v", path, err) + cleanupChunks() + return nil + } + if err := os.WriteFile(markerPath, append(payload, '\n'), defaultChunkFilePerm); err != nil { + logger.Warning("Failed to write chunk marker for %s: %v", path, err) + _ = os.Remove(markerPath) + cleanupChunks() + return nil + } + // Best-effort: preserve the original file's mtime on the marker too. + mt := time.Unix(0, meta.ModTimeUnixNano) + _ = os.Chtimes(markerPath, mt, mt) if err := os.Remove(path); err != nil { logger.Warning("Failed to remove original file %s after chunking: %v", path, err) - } else if err := os.WriteFile(path+".chunked", []byte{}, defaultChunkFilePerm); err != nil { - logger.Warning("Failed to write chunk marker for %s: %v", path, err) + _ = os.Remove(markerPath) + cleanupChunks() + return nil } processed++ logger.Debug("Chunked %s into %s", path, destBase) @@ -243,64 +309,114 @@ func chunkLargeFiles(ctx context.Context, logger *logging.Logger, root string, c return nil } -func splitFile(path, destBase string, chunkSize int64) error { +type splitFileResult struct { + ChunkCount int + SizeBytes int64 + SHA256 string +} + +func splitFile(path, destBase string, chunkSize int64) (splitFileResult, error) { if err := os.MkdirAll(filepath.Dir(destBase), defaultChunkDirPerm); err != nil { - return err + return splitFileResult{}, err } in, err := os.Open(path) if err != nil { - return err + return splitFileResult{}, err } defer in.Close() buf := make([]byte, chunkBufferSize) - index := 0 + hasher := sha256.New() + var createdChunks []string + cleanup := func() { + for _, p := range createdChunks { + _ = os.Remove(p) + } + } + chunkCount := 0 + var total int64 for { - index++ - chunkPath := fmt.Sprintf("%s.%03d.chunk", destBase, index) - done, err := writeChunk(in, chunkPath, buf, chunkSize) + chunkPath := fmt.Sprintf("%s.%03d.chunk", destBase, chunkCount+1) + done, n, err := writeChunk(in, chunkPath, buf, chunkSize, hasher) if err != nil { - return err + cleanup() + return splitFileResult{}, err + } + if n > 0 { + createdChunks = append(createdChunks, chunkPath) + total += n + chunkCount++ } if done { break } } - return nil + if chunkCount == 0 { + return splitFileResult{}, fmt.Errorf("chunking produced no output for %s", path) + } + return splitFileResult{ + ChunkCount: chunkCount, + SizeBytes: total, + SHA256: fmt.Sprintf("%x", hasher.Sum(nil)), + }, nil } -func writeChunk(src *os.File, chunkPath string, buf []byte, limit int64) (bool, error) { +func writeChunk(src *os.File, chunkPath string, buf []byte, limit int64, hasher hash.Hash) (bool, int64, error) { out, err := os.OpenFile(chunkPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, defaultChunkFilePerm) if err != nil { - return false, err + return false, 0, err } defer out.Close() var written int64 for written < limit { remaining := limit - written - if remaining < int64(len(buf)) { - buf = buf[:remaining] + toRead := buf + if remaining < int64(len(toRead)) { + toRead = toRead[:remaining] } - n, err := src.Read(buf) + n, err := src.Read(toRead) if n > 0 { - if _, wErr := out.Write(buf[:n]); wErr != nil { - return false, wErr + if _, wErr := out.Write(toRead[:n]); wErr != nil { + return false, written, wErr + } + if hasher != nil { + if _, hErr := hasher.Write(toRead[:n]); hErr != nil { + return false, written, hErr + } } written += int64(n) } if err != nil { if err == io.EOF { - return true, nil + if written == 0 { + _ = out.Close() + _ = os.Remove(chunkPath) + } + return true, written, nil } - return false, err + return false, written, err } if written >= limit { - return false, nil + var probe [1]byte + n, pErr := src.Read(probe[:]) + if n > 0 { + if _, sErr := src.Seek(-int64(n), io.SeekCurrent); sErr != nil { + return false, written, fmt.Errorf("seek after probe: %w", sErr) + } + return false, written, nil + } + if pErr == io.EOF { + return true, written, nil + } + if pErr != nil { + return false, written, pErr + } + return false, written, fmt.Errorf("unexpected empty read while probing for EOF") } } - return false, nil + return false, written, nil } func prefilterFiles(ctx context.Context, logger *logging.Logger, root string, maxSize int64) error { @@ -411,8 +527,37 @@ func normalizeTextFile(path string) (bool, error) { func normalizeConfigFile(path string) (bool, error) { // Config files can be whitespace/ordering-sensitive (e.g. section headers). - // Only perform safe, semantic-preserving normalization here. - return normalizeTextFile(path) + // Only perform safe, semantic-preserving normalization: + // 1. Strip UTF-8 BOM + // 2. Normalize CRLF → LF, stray CR → LF + // 3. Strip trailing whitespace from each line + // 4. Consolidate trailing newlines to exactly one + // Line order, leading indentation, comments, and blank lines between + // sections are preserved. + data, err := os.ReadFile(path) + if err != nil { + return false, err + } + if len(data) == 0 { + return false, nil + } + + normalized := bytes.TrimPrefix(data, []byte("\xef\xbb\xbf")) + normalized = bytes.ReplaceAll(normalized, []byte("\r\n"), []byte("\n")) + normalized = bytes.ReplaceAll(normalized, []byte("\r"), []byte("\n")) + + lines := bytes.Split(normalized, []byte("\n")) + for i, line := range lines { + lines[i] = bytes.TrimRight(line, " \t") + } + normalized = bytes.Join(lines, []byte("\n")) + + normalized = append(bytes.TrimRight(normalized, "\n"), '\n') + + if bytes.Equal(data, normalized) { + return false, nil + } + return true, os.WriteFile(path, normalized, defaultChunkFilePerm) } func minifyJSON(path string) (bool, error) { @@ -420,16 +565,356 @@ func minifyJSON(path string) (bool, error) { if err != nil { return false, err } - var tmp any - if err := json.Unmarshal(data, &tmp); err != nil { + var buf bytes.Buffer + if err := json.Compact(&buf, data); err != nil { return false, err } - minified, err := json.Marshal(tmp) + compacted := buf.Bytes() + if bytes.Equal(data, compacted) { + return false, nil + } + return true, os.WriteFile(path, compacted, defaultChunkFilePerm) +} + +// ReassembleChunkedFiles locates .chunked marker files under root, +// concatenates the matching .NNN.chunk fragments from the chunked_files +// directory, writes the reassembled file, and cleans up markers and chunks. +func ReassembleChunkedFiles(logger *logging.Logger, root string) error { + chunkDir := filepath.Join(root, "chunked_files") + if _, err := os.Stat(chunkDir); os.IsNotExist(err) { + return nil + } + + var markers []string + err := filepath.WalkDir(root, func(path string, d os.DirEntry, walkErr error) error { + if walkErr != nil { + return walkErr + } + if d.IsDir() { + if path == chunkDir { + return filepath.SkipDir + } + return nil + } + if strings.HasSuffix(path, ".chunked") { + markers = append(markers, path) + } + return nil + }) if err != nil { - return false, err + return fmt.Errorf("walk for chunk markers: %w", err) } - if bytes.Equal(bytes.TrimSpace(data), minified) { - return false, nil + + if len(markers) == 0 { + return nil + } + + var reassembled int + var incomplete bool + + for _, marker := range markers { + originalPath := strings.TrimSuffix(marker, ".chunked") + rel, err := filepath.Rel(root, originalPath) + if err != nil { + logger.Warning("Failed to compute rel path for %s: %v", originalPath, err) + incomplete = true + continue + } + chunkBase := filepath.Join(chunkDir, rel) + + chunks, err := discoverChunks(chunkBase) + if err != nil || len(chunks) == 0 { + logger.Warning("No chunks found for %s (base=%s): %v", rel, chunkBase, err) + incomplete = true + continue + } + + meta, err := readChunkedFileMetadata(marker) + if err != nil { + logger.Warning("Chunk marker metadata unreadable for %s: %v", rel, err) + incomplete = true + continue + } + + ambiguous, err := validateChunkSet(meta, chunks) + if err != nil { + logger.Warning("Chunk set incomplete for %s: %v", rel, err) + incomplete = true + continue + } + if meta == nil && ambiguous { + logger.Warning("Legacy chunk marker without metadata for %s; reassembly cannot fully verify completeness", rel) + } + + if err := concatenateChunks(originalPath, chunks, meta); err != nil { + logger.Warning("Failed to reassemble %s: %v", rel, err) + incomplete = true + continue + } + if meta != nil { + applyChunkedMetadata(logger, originalPath, meta) + } + + _ = os.Remove(marker) + for _, c := range chunks { + _ = os.Remove(c.Path) + } + logger.Debug("Reassembled %s from %d chunks", rel, len(chunks)) + reassembled++ + } + + // Remove chunked_files dir tree if now empty. + if reassembled > 0 && !incomplete { + removeEmptyDirs(chunkDir) + _ = os.Remove(chunkDir) + } + + return nil +} + +type chunkInfo struct { + Index int + Path string +} + +// discoverChunks returns the numerically-sorted list of chunks for a base. +// Chunks are named ..chunk where is a positive integer. +func discoverChunks(base string) ([]chunkInfo, error) { + dir := filepath.Dir(base) + prefix := filepath.Base(base) + "." + + entries, err := os.ReadDir(dir) + if err != nil { + return nil, err + } + + var chunks []chunkInfo + for _, e := range entries { + name := e.Name() + if !strings.HasPrefix(name, prefix) || !strings.HasSuffix(name, ".chunk") { + continue + } + idxStr := strings.TrimSuffix(strings.TrimPrefix(name, prefix), ".chunk") + idx, err := strconv.Atoi(idxStr) + if err != nil || idx <= 0 { + continue + } + chunks = append(chunks, chunkInfo{Index: idx, Path: filepath.Join(dir, name)}) + } + + sort.Slice(chunks, func(i, j int) bool { return chunks[i].Index < chunks[j].Index }) + return chunks, nil +} + +func concatenateChunks(dest string, chunks []chunkInfo, meta *chunkedFileMetadata) error { + if err := os.MkdirAll(filepath.Dir(dest), defaultChunkDirPerm); err != nil { + return err + } + + tmpDir := filepath.Dir(dest) + tmp, err := os.CreateTemp(tmpDir, "."+filepath.Base(dest)+".reassemble-*.tmp") + if err != nil { + return err + } + tmpPath := tmp.Name() + defer func() { + if tmpPath != "" { + _ = os.Remove(tmpPath) + } + }() + + if err := os.Chmod(tmpPath, defaultChunkFilePerm); err != nil { + tmp.Close() + return err + } + + buf := make([]byte, chunkBufferSize) + + var hasher hash.Hash + if meta != nil && meta.SHA256 != "" { + hasher = sha256.New() + } + + var written int64 + for _, chunk := range chunks { + in, err := os.Open(chunk.Path) + if err != nil { + tmp.Close() + return err + } + var dst io.Writer = tmp + if hasher != nil { + dst = io.MultiWriter(tmp, hasher) + } + n, err := io.CopyBuffer(dst, in, buf) + if cErr := in.Close(); cErr != nil && err == nil { + err = cErr + } + if err != nil { + tmp.Close() + return err + } + written += n + } + + if err := tmp.Close(); err != nil { + return err + } + + if meta != nil { + if meta.SizeBytes > 0 && written != meta.SizeBytes { + return fmt.Errorf("size mismatch after reassembly: got %d bytes, expected %d", written, meta.SizeBytes) + } + if hasher != nil { + got := fmt.Sprintf("%x", hasher.Sum(nil)) + if got != meta.SHA256 { + return fmt.Errorf("sha256 mismatch after reassembly") + } + } + } + + if err := os.Rename(tmpPath, dest); err != nil { + return err + } + tmpPath = "" + return nil +} + +func removeEmptyDirs(root string) { + var dirs []string + filepath.WalkDir(root, func(path string, d os.DirEntry, err error) error { + if err != nil { + return nil + } + if d.IsDir() && path != root { + dirs = append(dirs, path) + } + return nil + }) + for i := len(dirs) - 1; i >= 0; i-- { + os.Remove(dirs[i]) + } +} + +func readChunkedFileMetadata(markerPath string) (*chunkedFileMetadata, error) { + data, err := os.ReadFile(markerPath) + if err != nil { + return nil, err + } + data = bytes.TrimSpace(data) + if len(data) == 0 { + return nil, nil + } + + var meta chunkedFileMetadata + if err := json.Unmarshal(data, &meta); err != nil { + return nil, err + } + if meta.Version != 1 { + return nil, fmt.Errorf("unsupported chunk metadata version %d", meta.Version) + } + if meta.ChunkCount <= 0 || meta.ChunkSizeBytes <= 0 || meta.SizeBytes <= 0 { + return nil, fmt.Errorf("invalid chunk metadata (count=%d chunkSize=%d size=%d)", meta.ChunkCount, meta.ChunkSizeBytes, meta.SizeBytes) + } + return &meta, nil +} + +func validateChunkSet(meta *chunkedFileMetadata, chunks []chunkInfo) (bool, error) { + if len(chunks) == 0 { + return false, fmt.Errorf("no chunk files present") + } + + for i, c := range chunks { + want := i + 1 + if c.Index != want { + return false, fmt.Errorf("missing or out-of-order chunk: expected index %d, got %d", want, c.Index) + } + } + + if meta == nil { + // Legacy (empty marker): best-effort structural validation. + var chunkSize int64 + sizes := make([]int64, len(chunks)) + for i, c := range chunks { + info, err := os.Stat(c.Path) + if err != nil { + return false, fmt.Errorf("stat chunk %s: %w", c.Path, err) + } + if !info.Mode().IsRegular() { + return false, fmt.Errorf("chunk is not a regular file: %s", c.Path) + } + sizes[i] = info.Size() + if sizes[i] > chunkSize { + chunkSize = sizes[i] + } + } + if chunkSize <= 0 { + return false, fmt.Errorf("invalid chunk size inferred") + } + for i := 0; i < len(sizes)-1; i++ { + if sizes[i] != chunkSize { + return false, fmt.Errorf("chunk size mismatch for index %d: got %d, expected %d", i+1, sizes[i], chunkSize) + } + } + last := sizes[len(sizes)-1] + if last <= 0 || last > chunkSize { + return false, fmt.Errorf("last chunk size invalid: %d (chunkSize=%d)", last, chunkSize) + } + return last == chunkSize, nil + } + + if meta.ChunkCount != len(chunks) { + return false, fmt.Errorf("chunk count mismatch: expected %d, found %d", meta.ChunkCount, len(chunks)) + } + + for i, c := range chunks { + info, err := os.Stat(c.Path) + if err != nil { + return false, fmt.Errorf("stat chunk %s: %w", c.Path, err) + } + if !info.Mode().IsRegular() { + return false, fmt.Errorf("chunk is not a regular file: %s", c.Path) + } + expected := meta.ChunkSizeBytes + if i == meta.ChunkCount-1 { + expected = meta.SizeBytes - meta.ChunkSizeBytes*int64(meta.ChunkCount-1) + } + if expected <= 0 { + return false, fmt.Errorf("invalid expected chunk size for index %d", i+1) + } + if info.Size() != expected { + return false, fmt.Errorf("chunk size mismatch for index %d: got %d, expected %d", i+1, info.Size(), expected) + } + } + + return false, nil +} + +func applyChunkedMetadata(logger *logging.Logger, destPath string, meta *chunkedFileMetadata) { + if meta == nil { + return + } + + if meta.UID >= 0 || meta.GID >= 0 { + uid := meta.UID + gid := meta.GID + if uid < 0 { + uid = -1 + } + if gid < 0 { + gid = -1 + } + if err := os.Chown(destPath, uid, gid); err != nil { + logger.Debug("Failed to chown reassembled file %s: %v", destPath, err) + } + } + + if err := os.Chmod(destPath, os.FileMode(meta.Mode)); err != nil { + logger.Debug("Failed to chmod reassembled file %s: %v", destPath, err) + } + + mt := time.Unix(0, meta.ModTimeUnixNano) + if err := os.Chtimes(destPath, mt, mt); err != nil { + logger.Debug("Failed to set timestamps on reassembled file %s: %v", destPath, err) } - return true, os.WriteFile(path, minified, defaultChunkFilePerm) } diff --git a/internal/backup/optimizations_helpers_test.go b/internal/backup/optimizations_helpers_test.go index 7c6fffa..3835e9c 100644 --- a/internal/backup/optimizations_helpers_test.go +++ b/internal/backup/optimizations_helpers_test.go @@ -2,11 +2,15 @@ package backup import ( "bytes" + "context" "encoding/json" "os" "path/filepath" "strings" "testing" + + "github.com/tis24dev/proxsave/internal/logging" + "github.com/tis24dev/proxsave/internal/types" ) func TestSplitFileAndChunks(t *testing.T) { @@ -18,9 +22,16 @@ func TestSplitFileAndChunks(t *testing.T) { } destBase := filepath.Join(tmp, "chunks", "data.bin") - if err := splitFile(source, destBase, 16); err != nil { + res, err := splitFile(source, destBase, 16) + if err != nil { t.Fatalf("splitFile: %v", err) } + if res.ChunkCount != 3 { + t.Fatalf("chunk count %d, want 3", res.ChunkCount) + } + if res.SizeBytes != int64(len(content)) { + t.Fatalf("split size %d, want %d", res.SizeBytes, len(content)) + } chunks := []string{ destBase + ".001.chunk", @@ -119,3 +130,231 @@ func TestMinifyJSONKeepsData(t *testing.T) { t.Fatalf("unexpected decoded content: %+v", decoded) } } + +// TestMinifyJSONPreservesLargeIntegers verifies that json.Compact preserves +// numeric values that exceed float64 precision (integers > 2^53). +func TestMinifyJSONPreservesLargeIntegers(t *testing.T) { + tmp := t.TempDir() + path := filepath.Join(tmp, "data.json") + // 9007199254740993 is 2^53 + 1, which loses precision under float64. + input := `{"id": 9007199254740993, "name": "test"}` + if err := os.WriteFile(path, []byte(input), 0o640); err != nil { + t.Fatalf("write: %v", err) + } + if _, err := minifyJSON(path); err != nil { + t.Fatalf("minifyJSON: %v", err) + } + got, _ := os.ReadFile(path) + if !bytes.Contains(got, []byte("9007199254740993")) { + t.Fatalf("large integer lost precision: got %q", got) + } +} + +// TestMinifyJSONPreservesKeyOrder verifies that json.Compact does not +// reorder object keys (unlike json.Marshal on map[string]any). +func TestMinifyJSONPreservesKeyOrder(t *testing.T) { + tmp := t.TempDir() + path := filepath.Join(tmp, "data.json") + // Keys deliberately in reverse alphabetical order. + input := "{\n \"z\": 1,\n \"a\": 2\n}\n" + if err := os.WriteFile(path, []byte(input), 0o640); err != nil { + t.Fatalf("write: %v", err) + } + if _, err := minifyJSON(path); err != nil { + t.Fatalf("minifyJSON: %v", err) + } + got, _ := os.ReadFile(path) + expected := `{"z":1,"a":2}` + if string(got) != expected { + t.Fatalf("key order changed: expected %q, got %q", expected, string(got)) + } +} + +// TestMinifyJSONNoopOnAlreadyCompact verifies no disk write when file is +// already compact. +func TestMinifyJSONNoopOnAlreadyCompact(t *testing.T) { + tmp := t.TempDir() + path := filepath.Join(tmp, "data.json") + compact := `{"a":1,"b":2}` + if err := os.WriteFile(path, []byte(compact), 0o640); err != nil { + t.Fatalf("write: %v", err) + } + info1, _ := os.Stat(path) + if _, err := minifyJSON(path); err != nil { + t.Fatalf("minifyJSON: %v", err) + } + info2, _ := os.Stat(path) + if !info1.ModTime().Equal(info2.ModTime()) { + t.Fatalf("file was rewritten even though already compact") + } +} + +// TestReassembleChunkedFilesRoundTrip verifies that chunk + reassemble is a +// lossless round-trip: the reassembled file is byte-identical to the original. +func TestReassembleChunkedFilesRoundTrip(t *testing.T) { + root := t.TempDir() + + // Create a file that will be chunked (96 bytes, threshold 64, chunk size 16). + original := bytes.Repeat([]byte("ABCDEFGHIJKLMNOP"), 6) // 96 bytes + bigFile := filepath.Join(root, "subdir", "large.bin") + if err := os.MkdirAll(filepath.Dir(bigFile), 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(bigFile, original, 0o640); err != nil { + t.Fatal(err) + } + + logger := logging.New(types.LogLevelError, false) + cfg := OptimizationConfig{ + EnableChunking: true, + ChunkSizeBytes: 16, + ChunkThresholdBytes: 64, + } + + // Apply optimizations (only chunking enabled). + if err := ApplyOptimizations(context.Background(), logger, root, cfg); err != nil { + t.Fatalf("ApplyOptimizations: %v", err) + } + + // Verify the original is gone and the marker exists. + if _, err := os.Stat(bigFile); !os.IsNotExist(err) { + t.Fatalf("expected original removed, stat err=%v", err) + } + if _, err := os.Stat(bigFile + ".chunked"); err != nil { + t.Fatalf("chunk marker missing: %v", err) + } + // Regression: if file size is an exact multiple of chunk size, we must not + // create an extra empty chunk. + if _, err := os.Stat(filepath.Join(root, "chunked_files", "subdir", "large.bin.006.chunk")); err != nil { + t.Fatalf("expected last chunk to exist: %v", err) + } + if _, err := os.Stat(filepath.Join(root, "chunked_files", "subdir", "large.bin.007.chunk")); !os.IsNotExist(err) { + t.Fatalf("expected no extra empty chunk, stat err=%v", err) + } + + // Reassemble. + if err := ReassembleChunkedFiles(logger, root); err != nil { + t.Fatalf("ReassembleChunkedFiles: %v", err) + } + + // Verify byte-identical round-trip. + reassembled, err := os.ReadFile(bigFile) + if err != nil { + t.Fatalf("read reassembled: %v", err) + } + if !bytes.Equal(reassembled, original) { + t.Fatalf("reassembled content differs: got %d bytes, want %d bytes", len(reassembled), len(original)) + } + + // Verify cleanup: marker removed, chunked_files dir removed. + if _, err := os.Stat(bigFile + ".chunked"); !os.IsNotExist(err) { + t.Fatalf("chunk marker should be removed, stat err=%v", err) + } + if _, err := os.Stat(filepath.Join(root, "chunked_files")); !os.IsNotExist(err) { + t.Fatalf("chunked_files dir should be removed, stat err=%v", err) + } +} + +// TestReassembleNoopWithoutChunks verifies ReassembleChunkedFiles is a no-op +// when the directory contains no chunked files. +func TestReassembleNoopWithoutChunks(t *testing.T) { + root := t.TempDir() + filePath := filepath.Join(root, "normal.txt") + if err := os.WriteFile(filePath, []byte("hello"), 0o640); err != nil { + t.Fatal(err) + } + logger := logging.New(types.LogLevelError, false) + if err := ReassembleChunkedFiles(logger, root); err != nil { + t.Fatalf("ReassembleChunkedFiles on clean dir: %v", err) + } + got, _ := os.ReadFile(filePath) + if string(got) != "hello" { + t.Fatalf("file modified unexpectedly: %q", got) + } +} + +// TestNormalizeConfigFileSafeOperations verifies each of the four safe +// operations performed by normalizeConfigFile individually and combined. +func TestNormalizeConfigFileSafeOperations(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "BOM removal", + input: "\xef\xbb\xbf[section]\n\tkey = value\n", + expected: "[section]\n\tkey = value\n", + }, + { + name: "trailing whitespace per line", + input: "datastore: Data1\n\tpath /mnt/data \n\tgc-schedule 05:00\t\n", + expected: "datastore: Data1\n\tpath /mnt/data\n\tgc-schedule 05:00\n", + }, + { + name: "trailing newlines consolidated", + input: "[section]\n\tkey = value\n\n\n\n", + expected: "[section]\n\tkey = value\n", + }, + { + name: "CRLF normalized to LF", + input: "datastore: X\r\n\tpath /tmp\r\n", + expected: "datastore: X\n\tpath /tmp\n", + }, + { + name: "stray CR normalized to LF", + input: "line1\rline2\n", + expected: "line1\nline2\n", + }, + { + name: "all operations combined", + input: "\xef\xbb\xbfdatastore: D1\r\n\tpath /mnt/d1 \r\n\tgc 05:00\t\r\n\n\n", + expected: "datastore: D1\n\tpath /mnt/d1\n\tgc 05:00\n", + }, + { + name: "clean file unchanged", + input: "datastore: Data1\n\tpath /mnt/data\n\tgc-schedule 05:00\n", + expected: "datastore: Data1\n\tpath /mnt/data\n\tgc-schedule 05:00\n", + }, + { + name: "preserves leading indentation", + input: "\t\tdeep indent\n\t\t\tdeeper\n", + expected: "\t\tdeep indent\n\t\t\tdeeper\n", + }, + { + name: "preserves blank lines between sections", + input: "datastore: A\n\tpath /a\n\ndatastore: B\n\tpath /b\n", + expected: "datastore: A\n\tpath /a\n\ndatastore: B\n\tpath /b\n", + }, + { + name: "preserves comments", + input: "# main config\n; alt comment\nkey = value\n", + expected: "# main config\n; alt comment\nkey = value\n", + }, + { + name: "empty file stays empty", + input: "", + expected: "", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + tmp := t.TempDir() + path := filepath.Join(tmp, "test.cfg") + if err := os.WriteFile(path, []byte(tc.input), 0o640); err != nil { + t.Fatalf("write: %v", err) + } + if _, err := normalizeConfigFile(path); err != nil { + t.Fatalf("normalizeConfigFile: %v", err) + } + got, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read: %v", err) + } + if string(got) != tc.expected { + t.Fatalf("mismatch\ninput: %q\nexpected: %q\ngot: %q", tc.input, tc.expected, string(got)) + } + }) + } +} diff --git a/internal/orchestrator/chunking_paths.go b/internal/orchestrator/chunking_paths.go new file mode 100644 index 0000000..ef54811 --- /dev/null +++ b/internal/orchestrator/chunking_paths.go @@ -0,0 +1,28 @@ +package orchestrator + +import "strings" + +func originalPathFromChunk(relPath string) (string, bool) { + if !strings.HasSuffix(relPath, ".chunk") { + return "", false + } + withoutSuffix := strings.TrimSuffix(relPath, ".chunk") + dot := strings.LastIndexByte(withoutSuffix, '.') + if dot < 0 { + return "", false + } + idx := withoutSuffix[dot+1:] + if idx == "" { + return "", false + } + for i := 0; i < len(idx); i++ { + if idx[i] < '0' || idx[i] > '9' { + return "", false + } + } + original := withoutSuffix[:dot] + if original == "" { + return "", false + } + return original, true +} diff --git a/internal/orchestrator/decrypt_test.go b/internal/orchestrator/decrypt_test.go index 59be13c..743520f 100644 --- a/internal/orchestrator/decrypt_test.go +++ b/internal/orchestrator/decrypt_test.go @@ -3543,7 +3543,8 @@ echo $count > "%s" # First call (archive) succeeds, second call (metadata) fails if [ "$count" -eq 1 ]; then # Create the target file for archive - target="${@: -1}" + # rclone copyto [flags...] + target="$3" echo "archive content" > "$target" exit 0 else diff --git a/internal/orchestrator/restore.go b/internal/orchestrator/restore.go index 2f14d20..cb72463 100644 --- a/internal/orchestrator/restore.go +++ b/internal/orchestrator/restore.go @@ -18,6 +18,7 @@ import ( "syscall" "time" + "github.com/tis24dev/proxsave/internal/backup" "github.com/tis24dev/proxsave/internal/config" "github.com/tis24dev/proxsave/internal/input" "github.com/tis24dev/proxsave/internal/logging" @@ -1296,7 +1297,7 @@ func extractArchiveNative(ctx context.Context, archivePath, destRoot string, log if selectiveMode { shouldExtract := false for _, cat := range categories { - if PathMatchesCategory(header.Name, cat) { + if archiveEntryMatchesCategory(header.Name, cat) { shouldExtract = true break } @@ -1326,6 +1327,12 @@ func extractArchiveNative(ctx context.Context, archivePath, destRoot string, log } } + // Reassemble any files that were split into chunks during backup optimization. + // This is a no-op when the archive contains no chunked files. + if err := backup.ReassembleChunkedFiles(logger, destRoot); err != nil { + logger.Warning("Chunk reassembly failed: %v", err) + } + // Write detailed log if logFile != nil { fmt.Fprintf(logFile, "=== FILES RESTORED ===\n") @@ -1375,6 +1382,35 @@ func extractArchiveNative(ctx context.Context, archivePath, destRoot string, log return nil } +func archiveEntryMatchesCategory(entryName string, category Category) bool { + if PathMatchesCategory(entryName, category) { + return true + } + + clean := strings.TrimPrefix(strings.TrimSpace(entryName), "./") + + // Marker files created by smart chunking: .chunked + if strings.HasSuffix(clean, ".chunked") { + original := strings.TrimSuffix(clean, ".chunked") + if original != clean && PathMatchesCategory(original, category) { + return true + } + } + + // Chunk files stored under chunked_files/..chunk + if strings.HasPrefix(clean, "chunked_files/") { + trimmed := strings.TrimPrefix(clean, "chunked_files/") + if PathMatchesCategory(trimmed, category) { + return true + } + if original, ok := originalPathFromChunk(trimmed); ok && PathMatchesCategory(original, category) { + return true + } + } + + return false +} + func isRealRestoreFS(fs FS) bool { switch fs.(type) { case osFS, *osFS: diff --git a/internal/orchestrator/restore_chunking_selective_test.go b/internal/orchestrator/restore_chunking_selective_test.go new file mode 100644 index 0000000..1521190 --- /dev/null +++ b/internal/orchestrator/restore_chunking_selective_test.go @@ -0,0 +1,112 @@ +package orchestrator + +import ( + "archive/tar" + "context" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "os" + "path/filepath" + "testing" + "time" + + "github.com/tis24dev/proxsave/internal/logging" + "github.com/tis24dev/proxsave/internal/types" +) + +func TestExtractArchiveNative_SelectiveRestoreReassemblesChunkedFiles(t *testing.T) { + logger := logging.New(types.LogLevelError, false) + + origFS := restoreFS + restoreFS = osFS{} + t.Cleanup(func() { restoreFS = origFS }) + + tmp := t.TempDir() + archivePath := filepath.Join(tmp, "test.tar") + + data := []byte("hello world") + sum := sha256.Sum256(data) + meta := map[string]any{ + "version": 1, + "size_bytes": len(data), + "chunk_size_bytes": 6, + "chunk_count": 2, + "sha256": hex.EncodeToString(sum[:]), + "mode": 0o640, + "uid": -1, + "gid": -1, + "mod_time_unix_nano": time.Now().UnixNano(), + } + metaBytes, _ := json.Marshal(meta) + + f, err := os.Create(archivePath) + if err != nil { + t.Fatalf("create tar: %v", err) + } + tw := tar.NewWriter(f) + addFile := func(name string, payload []byte) { + h := &tar.Header{ + Name: name, + Typeflag: tar.TypeReg, + Mode: 0o640, + Size: int64(len(payload)), + } + if err := tw.WriteHeader(h); err != nil { + t.Fatalf("write header %s: %v", name, err) + } + if len(payload) > 0 { + if _, err := tw.Write(payload); err != nil { + t.Fatalf("write data %s: %v", name, err) + } + } + } + + addFile("var/lib/pve-cluster/config.db-wal.chunked", metaBytes) + addFile("chunked_files/var/lib/pve-cluster/config.db-wal.001.chunk", data[:6]) + addFile("chunked_files/var/lib/pve-cluster/config.db-wal.002.chunk", data[6:]) + addFile("etc/hosts", []byte("127.0.0.1 localhost\n")) + + if err := tw.Close(); err != nil { + t.Fatalf("close tar writer: %v", err) + } + if err := f.Close(); err != nil { + t.Fatalf("close tar file: %v", err) + } + + destRoot := filepath.Join(tmp, "out") + if err := os.MkdirAll(destRoot, 0o755); err != nil { + t.Fatalf("mkdir destRoot: %v", err) + } + + cats := []Category{{ + ID: "pve_cluster_dbwal", + Name: "PVE Cluster DB WAL (test)", + Paths: []string{ + "./var/lib/pve-cluster/config.db-wal", + }, + }} + + if err := extractArchiveNative(context.Background(), archivePath, destRoot, logger, cats, RestoreModeCustom, nil, "", nil); err != nil { + t.Fatalf("extractArchiveNative: %v", err) + } + + originalPath := filepath.Join(destRoot, "var", "lib", "pve-cluster", "config.db-wal") + got, err := os.ReadFile(originalPath) + if err != nil { + t.Fatalf("read reassembled file: %v", err) + } + if string(got) != string(data) { + t.Fatalf("reassembled content mismatch: got %q", string(got)) + } + + if _, err := os.Stat(originalPath + ".chunked"); !os.IsNotExist(err) { + t.Fatalf("marker file should be removed, stat err=%v", err) + } + if _, err := os.Stat(filepath.Join(destRoot, "chunked_files")); !os.IsNotExist(err) { + t.Fatalf("chunked_files dir should be removed, stat err=%v", err) + } + if _, err := os.Stat(filepath.Join(destRoot, "etc", "hosts")); !os.IsNotExist(err) { + t.Fatalf("unrelated file should not be extracted in selective mode, stat err=%v", err) + } +} diff --git a/internal/orchestrator/selective.go b/internal/orchestrator/selective.go index 4b05ea2..503a196 100644 --- a/internal/orchestrator/selective.go +++ b/internal/orchestrator/selective.go @@ -107,38 +107,67 @@ func collectArchivePaths(tarReader *tar.Reader) []string { // pathMatchesPattern checks if an archive path matches a category pattern func pathMatchesPattern(archivePath, pattern string) bool { - // Normalize paths - normArchive := archivePath - if !strings.HasPrefix(normArchive, "./") { - normArchive = "./" + normArchive - } + match := func(archivePath, pattern string) bool { + // Normalize paths + normArchive := archivePath + if !strings.HasPrefix(normArchive, "./") { + normArchive = "./" + normArchive + } - normPattern := pattern - if !strings.HasPrefix(normPattern, "./") { - normPattern = "./" + normPattern - } + normPattern := pattern + if !strings.HasPrefix(normPattern, "./") { + normPattern = "./" + normPattern + } + + if strings.ContainsAny(normPattern, "*?[") && !strings.HasSuffix(normPattern, "/") { + if ok, err := path.Match(normPattern, normArchive); err == nil && ok { + return true + } + } + + // Exact match + if normArchive == normPattern { + return true + } + + // Directory prefix match + if strings.HasSuffix(normPattern, "/") { + if strings.HasPrefix(normArchive, normPattern) { + return true + } + } - if strings.ContainsAny(normPattern, "*?[") && !strings.HasSuffix(normPattern, "/") { - if ok, err := path.Match(normPattern, normArchive); err == nil && ok { + // Parent directory match + if strings.HasPrefix(normArchive, strings.TrimSuffix(normPattern, "/")+"/") { return true } + + return false } - // Exact match - if normArchive == normPattern { - return true + // Smart chunking stores large files as: + // - .chunked (marker file) + // - chunked_files/..chunk (chunk store) + // For category analysis, map these artifacts back to the original path. + candidates := []string{archivePath} + clean := strings.TrimPrefix(strings.TrimSpace(archivePath), "./") + + if strings.HasSuffix(clean, ".chunked") { + candidates = append(candidates, strings.TrimSuffix(clean, ".chunked")) } - // Directory prefix match - if strings.HasSuffix(normPattern, "/") { - if strings.HasPrefix(normArchive, normPattern) { - return true + if strings.HasPrefix(clean, "chunked_files/") { + trimmed := strings.TrimPrefix(clean, "chunked_files/") + candidates = append(candidates, trimmed) + if original, ok := originalPathFromChunk(trimmed); ok { + candidates = append(candidates, original) } } - // Parent directory match - if strings.HasPrefix(normArchive, strings.TrimSuffix(normPattern, "/")+"/") { - return true + for _, candidate := range candidates { + if match(candidate, pattern) { + return true + } } return false diff --git a/internal/orchestrator/selective_pure_test.go b/internal/orchestrator/selective_pure_test.go index 7f0d766..e79d4fe 100644 --- a/internal/orchestrator/selective_pure_test.go +++ b/internal/orchestrator/selective_pure_test.go @@ -11,7 +11,8 @@ func TestAnalyzeArchivePaths(t *testing.T) { paths := []string{ "./etc/pve/storage.cfg", - "./etc/network/interfaces", + "./etc/network/interfaces.chunked", + "./chunked_files/etc/network/interfaces.001.chunk", "./random/file", } @@ -40,6 +41,8 @@ func TestPathMatchesPatternVariants(t *testing.T) { }{ {"etc/pve/storage.cfg", "./etc/pve/", true}, {"./etc/network/interfaces", "./etc/network/interfaces", true}, + {"./etc/network/interfaces.chunked", "./etc/network/interfaces", true}, + {"./chunked_files/etc/network/interfaces.001.chunk", "./etc/network/interfaces", true}, {"./etc/network/interfaces.d/foo", "./etc/network/interfaces", false}, {"./var/log/syslog", "./etc/network/", false}, } From 29cd6a1e0afefd8821821b7be0647ba651ff5441 Mon Sep 17 00:00:00 2001 From: Damiano <71268257+tis24dev@users.noreply.github.com> Date: Sun, 15 Feb 2026 23:34:21 +0100 Subject: [PATCH 11/24] Prevent tests from creating '--progress' artifact Add a TestMain in internal/orchestrator to detect and fail if tests leave an artifact named "--progress" in the package directory, and to remove any stale artifact before running tests. Also harden the rclone-copy stub in internal/orchestrator/decrypt_test.go to treat empty or flag-like targets (starting with "--") as invalid destinations (prints error and exits 2), preventing tests from accidentally writing to a flag string. --- internal/orchestrator/artifact_guard_test.go | 33 ++++++++++++++++++++ internal/orchestrator/decrypt_test.go | 4 +++ 2 files changed, 37 insertions(+) create mode 100644 internal/orchestrator/artifact_guard_test.go diff --git a/internal/orchestrator/artifact_guard_test.go b/internal/orchestrator/artifact_guard_test.go new file mode 100644 index 0000000..cd1b36c --- /dev/null +++ b/internal/orchestrator/artifact_guard_test.go @@ -0,0 +1,33 @@ +package orchestrator + +import ( + "fmt" + "os" + "path/filepath" + "testing" +) + +// TestMain guards against tests accidentally creating artifacts in the package +// directory (e.g. due to naive fake binaries interpreting flags as paths). +func TestMain(m *testing.M) { + wd, err := os.Getwd() + if err != nil { + fmt.Fprintln(os.Stderr, "TestMain: failed to get wd:", err) + os.Exit(1) + } + + artifact := filepath.Join(wd, "--progress") + + // Clean up a stale artifact from a previous run (best-effort). + _ = os.Remove(artifact) + + code := m.Run() + + if _, err := os.Stat(artifact); err == nil { + fmt.Fprintln(os.Stderr, "ERROR: test left artifact:", artifact) + _ = os.Remove(artifact) + code = 1 + } + + os.Exit(code) +} diff --git a/internal/orchestrator/decrypt_test.go b/internal/orchestrator/decrypt_test.go index 743520f..16a79bf 100644 --- a/internal/orchestrator/decrypt_test.go +++ b/internal/orchestrator/decrypt_test.go @@ -3545,6 +3545,10 @@ if [ "$count" -eq 1 ]; then # Create the target file for archive # rclone copyto [flags...] target="$3" + if [[ -z "$target" || "$target" == --* ]]; then + echo "invalid rclone dst: $target" >&2 + exit 2 + fi echo "archive content" > "$target" exit 0 else From 753e100b8bd6963c576da65e80ac0276cbfe2d8e Mon Sep 17 00:00:00 2001 From: Damiano <71268257+tis24dev@users.noreply.github.com> Date: Mon, 16 Feb 2026 01:03:08 +0100 Subject: [PATCH 12/24] Enhance build/versioning, PBS API and warnings Makefile: improve VERSION derivation to produce stable dev-style version strings (include tag/dev count/sha and .dirty handling) for both build and build-release targets. internal/orchestrator/pbs_staged_apply.go: capture PBS API availability error, surface a descriptive error when merge mode skips API-applied PBS categories, and adjust logging for the merge-skip case. The function now returns an error if API-applied categories were skipped. internal/orchestrator/restore_workflow_ui.go: consider logger.HasWarnings() when deciding final restore summary so logged warnings are reflected in the final message. internal/orchestrator/restore_workflow_warnings_test.go: add a test (TestRunRestoreWorkflow_FinalSummaryReflectsLoggedWarnings) that asserts the final summary reports logged warnings; add required imports and test setup. --- Makefile | 40 +++++++- internal/orchestrator/pbs_staged_apply.go | 31 ++++++- internal/orchestrator/restore_workflow_ui.go | 2 +- .../restore_workflow_warnings_test.go | 92 +++++++++++++++++++ 4 files changed, 158 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 3b936c9..e3041d4 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,25 @@ COVERAGE_THRESHOLD ?= 50.0 # Build del progetto build: @echo "Building proxsave..." - @VERSION=$$(git describe --tags --abbrev=0 2>/dev/null || echo 0.0.0-dev); \ + @VERSION=$$( \ + if git describe --tags --exact-match >/dev/null 2>&1 && [ -z "$$(git status --porcelain 2>/dev/null)" ]; then \ + git describe --tags --abbrev=0 2>/dev/null || echo 0.0.0-dev; \ + else \ + desc=$$(git describe --tags --long --dirty --always 2>/dev/null || echo dev); \ + dirty=""; \ + case "$$desc" in *-dirty) dirty=".dirty"; desc=$${desc%-dirty};; esac; \ + sha_part=$${desc##*-}; \ + sha=$${sha_part#g}; \ + rest=$${desc%-*}; \ + n=$${rest##*-}; \ + tag=$${rest%-*}; \ + if [ "$$tag" = "$$desc" ] || [ -z "$$n" ] || [ -z "$$sha" ] || [ "$$sha_part" = "$$desc" ]; then \ + echo "0.0.0-dev.0+g$${desc}$$dirty"; \ + else \ + echo "$$tag-dev.$$n+g$$sha$$dirty"; \ + fi; \ + fi \ + ); \ COMMIT=$$(git rev-parse --short HEAD 2>/dev/null || echo dev); \ BUILD_TIME=$$(date -u +"%Y-%m-%dT%H:%M:%SZ"); \ go build -ldflags="-X 'main.buildTime=$$BUILD_TIME' -X 'github.com/tis24dev/proxsave/internal/version.Version=$$VERSION' -X 'github.com/tis24dev/proxsave/internal/version.Commit=$$COMMIT' -X 'github.com/tis24dev/proxsave/internal/version.Date=$$BUILD_TIME'" -o build/proxsave ./cmd/proxsave @@ -13,7 +31,25 @@ build: # Build ottimizzato per release build-release: @echo "Building release..." - @VERSION=$$(git describe --tags --abbrev=0 2>/dev/null || echo 0.0.0-dev); \ + @VERSION=$$( \ + if git describe --tags --exact-match >/dev/null 2>&1 && [ -z "$$(git status --porcelain 2>/dev/null)" ]; then \ + git describe --tags --abbrev=0 2>/dev/null || echo 0.0.0-dev; \ + else \ + desc=$$(git describe --tags --long --dirty --always 2>/dev/null || echo dev); \ + dirty=""; \ + case "$$desc" in *-dirty) dirty=".dirty"; desc=$${desc%-dirty};; esac; \ + sha_part=$${desc##*-}; \ + sha=$${sha_part#g}; \ + rest=$${desc%-*}; \ + n=$${rest##*-}; \ + tag=$${rest%-*}; \ + if [ "$$tag" = "$$desc" ] || [ -z "$$n" ] || [ -z "$$sha" ] || [ "$$sha_part" = "$$desc" ]; then \ + echo "0.0.0-dev.0+g$${desc}$$dirty"; \ + else \ + echo "$$tag-dev.$$n+g$$sha$$dirty"; \ + fi; \ + fi \ + ); \ COMMIT=$$(git rev-parse --short HEAD 2>/dev/null || echo dev); \ BUILD_TIME=$$(date -u +"%Y-%m-%dT%H:%M:%SZ"); \ go build -ldflags="-s -w -X 'main.buildTime=$$BUILD_TIME' -X 'github.com/tis24dev/proxsave/internal/version.Version=$$VERSION' -X 'github.com/tis24dev/proxsave/internal/version.Commit=$$COMMIT' -X 'github.com/tis24dev/proxsave/internal/version.Date=$$BUILD_TIME'" -o build/proxsave ./cmd/proxsave diff --git a/internal/orchestrator/pbs_staged_apply.go b/internal/orchestrator/pbs_staged_apply.go index be72037..ee80a5d 100644 --- a/internal/orchestrator/pbs_staged_apply.go +++ b/internal/orchestrator/pbs_staged_apply.go @@ -46,18 +46,41 @@ func maybeApplyPBSConfigsFromStage(ctx context.Context, logger *logging.Logger, needsAPI := plan.HasCategoryID("pbs_host") || plan.HasCategoryID("datastore_pbs") || plan.HasCategoryID("pbs_remotes") || plan.HasCategoryID("pbs_jobs") apiAvailable := false + var apiUnavailableErr error if needsAPI { - if err := ensurePBSServicesForAPI(ctx, logger); err != nil { + if apiErr := ensurePBSServicesForAPI(ctx, logger); apiErr != nil { + apiUnavailableErr = apiErr if allowFileFallback { - logger.Warning("PBS API apply unavailable; falling back to file-based staged apply where possible: %v", err) + logger.Warning("PBS API apply unavailable; falling back to file-based staged apply where possible: %v", apiErr) } else { - logger.Warning("PBS API apply unavailable; skipping API-applied PBS categories (merge mode): %v", err) + logging.DebugStep(logger, "pbs staged apply", "PBS API apply unavailable; merge mode will skip API-applied PBS categories: %v", apiErr) } } else { apiAvailable = true } } + var mergeAPISkipErr error + if needsAPI && !apiAvailable && !allowFileFallback && apiUnavailableErr != nil { + var skipped []string + if plan.HasCategoryID("pbs_host") { + skipped = append(skipped, "pbs_host (node.cfg/traffic-control.cfg)") + } + if plan.HasCategoryID("datastore_pbs") { + skipped = append(skipped, "datastore_pbs") + } + if plan.HasCategoryID("pbs_remotes") { + skipped = append(skipped, "pbs_remotes") + } + if plan.HasCategoryID("pbs_jobs") { + skipped = append(skipped, "pbs_jobs") + } + if len(skipped) == 0 { + skipped = append(skipped, "PBS API categories") + } + mergeAPISkipErr = fmt.Errorf("PBS API apply unavailable in %s; skipped %s: %w", behavior.DisplayName(), strings.Join(skipped, ", "), apiUnavailableErr) + } + if plan.HasCategoryID("pbs_host") { // Always restore file-only configs (no stable API coverage yet). // ACME should be applied before node config (node.cfg references ACME accounts/plugins). @@ -185,7 +208,7 @@ func maybeApplyPBSConfigsFromStage(ctx context.Context, logger *logging.Logger, } } - return nil + return mergeAPISkipErr } func applyPBSRemoteCfgFromStage(ctx context.Context, logger *logging.Logger, stageRoot string) (err error) { diff --git a/internal/orchestrator/restore_workflow_ui.go b/internal/orchestrator/restore_workflow_ui.go index f90c1ad..1872862 100644 --- a/internal/orchestrator/restore_workflow_ui.go +++ b/internal/orchestrator/restore_workflow_ui.go @@ -748,7 +748,7 @@ func runRestoreWorkflowWithUI(ctx context.Context, cfg *config.Config, logger *l } logger.Info("") - if restoreHadWarnings { + if restoreHadWarnings || (logger != nil && logger.HasWarnings()) { logger.Warning("Restore completed with warnings.") } else { logger.Info("Restore completed successfully.") diff --git a/internal/orchestrator/restore_workflow_warnings_test.go b/internal/orchestrator/restore_workflow_warnings_test.go index 3da4b6b..6bb31eb 100644 --- a/internal/orchestrator/restore_workflow_warnings_test.go +++ b/internal/orchestrator/restore_workflow_warnings_test.go @@ -1,11 +1,13 @@ package orchestrator import ( + "bytes" "context" "errors" "io/fs" "os" "path/filepath" + "strings" "testing" "time" @@ -130,3 +132,93 @@ func TestRunRestoreWorkflow_FstabMergeFails_ContinuesWithWarnings(t *testing.T) t.Fatalf("expected warnings") } } + +func TestRunRestoreWorkflow_FinalSummaryReflectsLoggedWarnings(t *testing.T) { + origRestoreFS := restoreFS + origRestoreCmd := restoreCmd + origRestoreSystem := restoreSystem + origRestoreTime := restoreTime + origCompatFS := compatFS + origPrepare := prepareRestoreBundleFunc + origSafetyFS := safetyFS + origSafetyNow := safetyNow + t.Cleanup(func() { + restoreFS = origRestoreFS + restoreCmd = origRestoreCmd + restoreSystem = origRestoreSystem + restoreTime = origRestoreTime + compatFS = origCompatFS + prepareRestoreBundleFunc = origPrepare + safetyFS = origSafetyFS + safetyNow = origSafetyNow + }) + + fakeFS := NewFakeFS() + t.Cleanup(func() { _ = os.RemoveAll(fakeFS.Root) }) + restoreFS = fakeFS + compatFS = fakeFS + safetyFS = fakeFS + + fakeNow := &FakeTime{Current: time.Date(2020, 1, 2, 3, 4, 5, 0, time.UTC)} + restoreTime = fakeNow + safetyNow = fakeNow.Now + + restoreSystem = fakeSystemDetector{systemType: SystemTypePVE} + restoreCmd = runOnlyRunner{} + + // Make compatibility detection treat this as PVE. + if err := fakeFS.AddFile("/usr/bin/qm", []byte("x")); err != nil { + t.Fatalf("fakeFS.AddFile: %v", err) + } + + // Minimal backup tar with one file from the "services" category. + tmpTar := filepath.Join(t.TempDir(), "bundle.tar") + if err := writeTarFile(tmpTar, map[string]string{ + "etc/timezone": "UTC\n", + }); err != nil { + t.Fatalf("writeTarFile: %v", err) + } + tarBytes, err := os.ReadFile(tmpTar) + if err != nil { + t.Fatalf("os.ReadFile: %v", err) + } + if err := fakeFS.WriteFile("/bundle.tar", tarBytes, 0o640); err != nil { + t.Fatalf("fakeFS.WriteFile(/bundle.tar): %v", err) + } + + prepareRestoreBundleFunc = func(ctx context.Context, cfg *config.Config, logger *logging.Logger, version string, ui RestoreWorkflowUI) (*decryptCandidate, *preparedBundle, error) { + cand := &decryptCandidate{ + DisplayBase: "test", + Manifest: &backup.Manifest{ + CreatedAt: fakeNow.Now(), + ClusterMode: "standalone", + ProxmoxType: "pbs", // force incompatibility warning on a PVE system + ScriptVersion: "vtest", + }, + } + prepared := &preparedBundle{ + ArchivePath: "/bundle.tar", + Manifest: backup.Manifest{ArchivePath: "/bundle.tar"}, + cleanup: func() {}, + } + return cand, prepared, nil + } + + var out bytes.Buffer + logger := logging.New(types.LogLevelInfo, false) + logger.SetOutput(&out) + cfg := &config.Config{BaseDir: "/base"} + ui := &fakeRestoreWorkflowUI{ + mode: RestoreModeCustom, + categories: []Category{mustCategoryByID(t, "services")}, + confirmRestore: true, + confirmCompatible: true, + } + + if err := runRestoreWorkflowWithUI(context.Background(), cfg, logger, "vtest", ui); err != nil { + t.Fatalf("runRestoreWorkflowWithUI error: %v", err) + } + if !strings.Contains(out.String(), "Restore completed with warnings.") { + t.Fatalf("expected final summary to report warnings; got output:\n%s", out.String()) + } +} From 16c0485c350b6086a0e8803c52b7fee856f7427b Mon Sep 17 00:00:00 2001 From: Damiano <71268257+tis24dev@users.noreply.github.com> Date: Mon, 16 Feb 2026 01:16:33 +0100 Subject: [PATCH 13/24] Fix category toggle/deselection logic and test Ensure deselecting a category actually removes it from the selection map and update selection counting. ShowCategorySelectionMenuWithReader now uses len(selected) to determine how many categories are chosen and deletes map entries when a category is toggled off, instead of leaving false-valued keys. Added a test to exercise toggling a category off then on and to verify continue behavior when no categories are selected. Affected files: internal/orchestrator/selective.go, internal/orchestrator/additional_helpers_test.go. --- internal/orchestrator/additional_helpers_test.go | 16 ++++++++++++++++ internal/orchestrator/selective.go | 11 ++++++----- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/internal/orchestrator/additional_helpers_test.go b/internal/orchestrator/additional_helpers_test.go index 8b20d73..df1623b 100644 --- a/internal/orchestrator/additional_helpers_test.go +++ b/internal/orchestrator/additional_helpers_test.go @@ -1231,6 +1231,22 @@ func TestShowCategorySelectionMenu(t *testing.T) { t.Fatalf("expected 2 categories after toggle, got %d", len(cats)) } + // Toggle category 1 twice, attempt continue with none selected, then select 1 and continue + r, w, _ = os.Pipe() + _, _ = w.WriteString("1\n1\nc\n1\nc\n") + _ = w.Close() + os.Stdin = r + cats, err = ShowCategorySelectionMenu(context.Background(), logger, available, SystemTypePVE) + if err != nil { + t.Fatalf("ShowCategorySelectionMenu deselect-all error: %v", err) + } + if len(cats) != 1 { + t.Fatalf("expected 1 category after re-select, got %d", len(cats)) + } + if cats[0].ID != "pve_cluster" { + t.Fatalf("expected pve_cluster selected, got %q", cats[0].ID) + } + // Cancel r, w, _ = os.Pipe() _, _ = w.WriteString("0\n") diff --git a/internal/orchestrator/selective.go b/internal/orchestrator/selective.go index 503a196..a6a4a61 100644 --- a/internal/orchestrator/selective.go +++ b/internal/orchestrator/selective.go @@ -312,10 +312,7 @@ func ShowCategorySelectionMenuWithReader(ctx context.Context, reader *bufio.Read selected = make(map[int]bool) case "c": // Continue - check if at least one category is selected - selectedCount := 0 - for range selected { - selectedCount++ - } + selectedCount := len(selected) if selectedCount == 0 { fmt.Println() @@ -346,7 +343,11 @@ func ShowCategorySelectionMenuWithReader(ctx context.Context, reader *bufio.Read // Toggle selection index := num - 1 - selected[index] = !selected[index] + if selected[index] { + delete(selected, index) + } else { + selected[index] = true + } } } } From befdf81f4a3007c2a08fab4c01cc21e65adfa148 Mon Sep 17 00:00:00 2001 From: Damiano <71268257+tis24dev@users.noreply.github.com> Date: Mon, 16 Feb 2026 02:06:10 +0100 Subject: [PATCH 14/24] Create and manage restore staging dirs Add secure staging directory lifecycle for restores: createRestoreStageDir() creates /tmp/proxsave/restore-stage-* with 0700 perms and a pid+sequence suffix; stageDestRoot() now includes pid. Implement cleanupOldRestoreStageDirs() to remove aged staging dirs and wire it into orchestrator startup and UI paths. Add preserve behavior via PROXSAVE_PRESERVE_RESTORE_STAGING (and preserve on warnings/network staged installs), auto-remove staging dir after successful clean restores, and log actions. Update docs to document 0700 perms and auto-clean behavior, and add tests covering stage dir creation and old-dir cleanup. --- docs/RESTORE_GUIDE.md | 4 +- docs/RESTORE_TECHNICAL.md | 2 +- internal/orchestrator/orchestrator.go | 11 +++ internal/orchestrator/restore_workflow_ui.go | 43 ++++++++- internal/orchestrator/staging.go | 73 ++++++++++++++- internal/orchestrator/staging_test.go | 95 ++++++++++++++++++++ 6 files changed, 220 insertions(+), 8 deletions(-) create mode 100644 internal/orchestrator/staging_test.go diff --git a/docs/RESTORE_GUIDE.md b/docs/RESTORE_GUIDE.md index bac43c2..5c0e370 100644 --- a/docs/RESTORE_GUIDE.md +++ b/docs/RESTORE_GUIDE.md @@ -86,7 +86,7 @@ Restore operations are organized into **20–22 categories** (PBS = 20, PVE = 22 Each category is handled in one of three ways: - **Normal**: extracted directly to `/` (system paths) after safety backup -- **Staged**: extracted to `/tmp/proxsave/restore-stage-*` and then applied in a controlled way (file copy/validation or API apply: `pvesh`/`pveum` on PVE, `proxmox-backup-manager` on PBS); when staged files are written to system paths, ProxSave applies them **atomically** and enforces the final permissions/ownership (including for any created parent directories; not left to `umask`) +- **Staged**: extracted to `/tmp/proxsave/restore-stage-*` (permissions `0700`) and then applied in a controlled way (file copy/validation or API apply: `pvesh`/`pveum` on PVE, `proxmox-backup-manager` on PBS); when staged files are written to system paths, ProxSave applies them **atomically** and enforces the final permissions/ownership (including for any created parent directories; not left to `umask`). On clean restores, the staging directory is removed automatically; set `PROXSAVE_PRESERVE_RESTORE_STAGING=1` to keep it. - **Export-only**: extracted to an export directory for manual review (never written to system paths) ### PVE-Specific Categories (11 categories) @@ -2699,7 +2699,7 @@ tar -xzf /path/to/decrypted.tar.gz ./specific/file/path A: Yes: - **Extraction**: ProxSave preserves UID/GID, mode bits and timestamps (mtime/atime) for extracted entries. -- **Staged categories**: files are extracted under `/tmp/proxsave/restore-stage-*` and then applied to system paths using atomic replace; ProxSave explicitly applies mode bits (not left to `umask`) and preserves/derives ownership/group to match expected system defaults (important on PBS, where `proxmox-backup-proxy` runs as `backup`; ProxSave also repairs common `root:root` group regressions by inheriting the destination parent directory's group). On supported filesystems, staged writes also `fsync()` the temporary file and the destination directory to reduce the risk of incomplete writes after a crash/power loss. +- **Staged categories**: files are extracted under `/tmp/proxsave/restore-stage-*` (permissions `0700`) and then applied to system paths using atomic replace; ProxSave explicitly applies mode bits (not left to `umask`) and preserves/derives ownership/group to match expected system defaults (important on PBS, where `proxmox-backup-proxy` runs as `backup`; ProxSave also repairs common `root:root` group regressions by inheriting the destination parent directory's group). On supported filesystems, staged writes also `fsync()` the temporary file and the destination directory to reduce the risk of incomplete writes after a crash/power loss. On clean restores, the staging directory is removed automatically (override: `PROXSAVE_PRESERVE_RESTORE_STAGING=1`). - **ctime**: Cannot be set (kernel-managed). --- diff --git a/docs/RESTORE_TECHNICAL.md b/docs/RESTORE_TECHNICAL.md index 2bd5c02..98340b2 100644 --- a/docs/RESTORE_TECHNICAL.md +++ b/docs/RESTORE_TECHNICAL.md @@ -871,7 +871,7 @@ func extractSelectiveArchive( #### Phase 10: Staged Apply (PVE/PBS) -After extraction, **staged categories** are applied from the staging directory under `/tmp/proxsave/restore-stage-*`. +After extraction, **staged categories** are applied from the staging directory under `/tmp/proxsave/restore-stage-*` (permissions `0700`). On clean restores, ProxSave removes the staging directory automatically (override: `PROXSAVE_PRESERVE_RESTORE_STAGING=1`). **PBS staged apply**: - Selected interactively during restore on PBS hosts: **Merge (existing PBS)** vs **Clean 1:1 (fresh PBS install)**. diff --git a/internal/orchestrator/orchestrator.go b/internal/orchestrator/orchestrator.go index ccaa936..60510de 100644 --- a/internal/orchestrator/orchestrator.go +++ b/internal/orchestrator/orchestrator.go @@ -1425,6 +1425,17 @@ func (o *Orchestrator) cleanupPreviousExecutionArtifacts() *TempDirRegistry { } } + // Phase 5: Cleanup old restore staging directories under /tmp/proxsave + stageRemoved, stageFailed := cleanupOldRestoreStageDirs(fs, o.logger, o.now(), tempDirCleanupAge) + if stageRemoved > 0 || stageFailed > 0 { + if !cleanupStarted { + o.logger.Debug("Starting cleanup of previous execution files...") + cleanupStarted = true + } + removedDirs += stageRemoved + failedFiles += stageFailed + } + // Final summary - only show if cleanup was actually performed if cleanupStarted { if removedFiles > 0 || removedDirs > 0 { diff --git a/internal/orchestrator/restore_workflow_ui.go b/internal/orchestrator/restore_workflow_ui.go index 1872862..d57f262 100644 --- a/internal/orchestrator/restore_workflow_ui.go +++ b/internal/orchestrator/restore_workflow_ui.go @@ -45,6 +45,10 @@ func runRestoreWorkflowWithUI(ctx context.Context, cfg *config.Config, logger *l done := logging.DebugStart(logger, "restore workflow (ui)", "version=%s", version) defer func() { done(err) }() + if removed, failed := cleanupOldRestoreStageDirs(restoreFS, logger, nowRestore(), tempDirCleanupAge); removed > 0 || failed > 0 { + logger.Debug("Restore staging cleanup (older than %s): removed=%d failed=%d", tempDirCleanupAge, removed, failed) + } + restoreHadWarnings := false defer func() { if err == nil { @@ -497,13 +501,29 @@ func runRestoreWorkflowWithUI(ctx context.Context, cfg *config.Config, logger *l stageLogPath := "" stageRoot := "" + stageWarningsStart := int64(0) + stageNetworkInstalled := false + preserveStage := preserveRestoreStagingFromEnv() || cfg.DryRun if len(plan.StagedCategories) > 0 { - stageRoot = stageDestRoot() + stageRoot, err = createRestoreStageDir() + if err != nil { + return fmt.Errorf("failed to create staging directory: %w", err) + } logger.Info("") logger.Info("Staging %d sensitive category(ies) to: %s", len(plan.StagedCategories), stageRoot) - if err := restoreFS.MkdirAll(stageRoot, 0o755); err != nil { - return fmt.Errorf("failed to create staging directory %s: %w", stageRoot, err) + if logger != nil { + stageWarningsStart = logger.WarningCount() } + defer func() { + if strings.TrimSpace(stageRoot) == "" || preserveStage { + return + } + if cleanupErr := restoreFS.RemoveAll(stageRoot); cleanupErr != nil { + logger.Warning("Failed to remove staging directory %s: %v", stageRoot, cleanupErr) + } else { + logger.Debug("Staging directory removed: %s", stageRoot) + } + }() if stageLog, err := extractSelectiveArchive(ctx, prepared.ArchivePath, stageRoot, plan.StagedCategories, RestoreModeCustom, logger); err != nil { if errors.Is(err, ErrRestoreAborted) || input.IsAborted(err) { @@ -592,6 +612,7 @@ func runRestoreWorkflowWithUI(ctx context.Context, cfg *config.Config, logger *l restoreHadWarnings = true logger.Warning("Network staged install: %v", err) } else if installed { + stageNetworkInstalled = true stageRootForNetworkApply = "" logging.DebugStep(logger, "restore", "Network staged install completed: configuration written to /etc (no reload); live apply will use system paths") } @@ -747,6 +768,15 @@ func runRestoreWorkflowWithUI(ctx context.Context, cfg *config.Config, logger *l } } + if strings.TrimSpace(stageRoot) != "" { + if plan != nil && plan.HasCategoryID("network") && !stageNetworkInstalled { + preserveStage = true + } + if logger != nil && logger.WarningCount() > stageWarningsStart { + preserveStage = true + } + } + logger.Info("") if restoreHadWarnings || (logger != nil && logger.HasWarnings()) { logger.Warning("Restore completed with warnings.") @@ -765,7 +795,12 @@ func runRestoreWorkflowWithUI(ctx context.Context, cfg *config.Config, logger *l logger.Info("Export detailed log: %s", exportLogPath) } if stageRoot != "" { - logger.Info("Staging directory: %s", stageRoot) + if preserveStage { + logger.Info("Staging directory (preserved): %s", stageRoot) + logger.Warning("Staging directory contains sensitive files. Remove it when no longer needed: rm -rf %s", stageRoot) + } else { + logger.Info("Staging directory (auto-cleanup): %s", stageRoot) + } } if stageLogPath != "" { logger.Info("Staging detailed log: %s", stageLogPath) diff --git a/internal/orchestrator/staging.go b/internal/orchestrator/staging.go index b3ebdaf..59a7344 100644 --- a/internal/orchestrator/staging.go +++ b/internal/orchestrator/staging.go @@ -2,9 +2,13 @@ package orchestrator import ( "fmt" + "os" "path/filepath" "strings" "sync/atomic" + "time" + + "github.com/tis24dev/proxsave/internal/logging" ) var restoreStageSequence uint64 @@ -50,5 +54,72 @@ func splitRestoreCategories(categories []Category) (normal []Category, staged [] func stageDestRoot() string { base := "/tmp/proxsave" seq := atomic.AddUint64(&restoreStageSequence, 1) - return filepath.Join(base, fmt.Sprintf("restore-stage-%s_%d", nowRestore().Format("20060102-150405"), seq)) + return filepath.Join(base, fmt.Sprintf("restore-stage-%s_pid%d_%d", nowRestore().Format("20060102-150405"), os.Getpid(), seq)) +} + +func createRestoreStageDir() (string, error) { + base := "/tmp/proxsave" + if err := restoreFS.MkdirAll(base, 0o755); err != nil { + return "", fmt.Errorf("ensure staging base directory %s: %w", base, err) + } + + dir := stageDestRoot() + if err := restoreFS.MkdirAll(dir, 0o700); err != nil { + return "", fmt.Errorf("create staging directory %s: %w", dir, err) + } + return dir, nil +} + +func preserveRestoreStagingFromEnv() bool { + v := strings.TrimSpace(os.Getenv("PROXSAVE_PRESERVE_RESTORE_STAGING")) + if v == "" { + return false + } + switch strings.ToLower(v) { + case "1", "true", "yes", "y", "on": + return true + default: + return false + } +} + +func cleanupOldRestoreStageDirs(fs FS, logger *logging.Logger, now time.Time, maxAge time.Duration) (removed int, failed int) { + base := "/tmp/proxsave" + entries, err := fs.ReadDir(base) + if err != nil { + return 0, 0 + } + + cutoff := now.Add(-maxAge) + for _, entry := range entries { + if entry == nil || !entry.IsDir() { + continue + } + name := strings.TrimSpace(entry.Name()) + if name == "" || !strings.HasPrefix(name, "restore-stage-") { + continue + } + fullPath := filepath.Join(base, name) + info, err := fs.Stat(fullPath) + if err != nil || info == nil || !info.IsDir() { + continue + } + if info.ModTime().After(cutoff) { + continue + } + + if err := fs.RemoveAll(fullPath); err != nil { + failed++ + if logger != nil { + logger.Debug("Failed to cleanup restore staging directory %s: %v", fullPath, err) + } + continue + } + removed++ + if logger != nil { + logger.Debug("Cleaned old restore staging directory: %s", fullPath) + } + } + + return removed, failed } diff --git a/internal/orchestrator/staging_test.go b/internal/orchestrator/staging_test.go new file mode 100644 index 0000000..b3e840f --- /dev/null +++ b/internal/orchestrator/staging_test.go @@ -0,0 +1,95 @@ +package orchestrator + +import ( + "os" + "strings" + "testing" + "time" +) + +func TestCreateRestoreStageDir_Creates0700Directory(t *testing.T) { + origFS := restoreFS + origTime := restoreTime + origSeq := restoreStageSequence + t.Cleanup(func() { + restoreFS = origFS + restoreTime = origTime + restoreStageSequence = origSeq + }) + + fake := NewFakeFS() + t.Cleanup(func() { _ = os.RemoveAll(fake.Root) }) + restoreFS = fake + restoreTime = &FakeTime{Current: time.Unix(1700000000, 0)} + restoreStageSequence = 0 + + stageRoot, err := createRestoreStageDir() + if err != nil { + t.Fatalf("createRestoreStageDir error: %v", err) + } + if !strings.HasPrefix(stageRoot, "/tmp/proxsave/restore-stage-") { + t.Fatalf("stageRoot=%q; want under /tmp/proxsave/restore-stage-*", stageRoot) + } + + info, err := fake.Stat(stageRoot) + if err != nil { + t.Fatalf("Stat(%q): %v", stageRoot, err) + } + if info == nil || !info.IsDir() { + t.Fatalf("Stat(%q): isDir=%v; want dir", stageRoot, info != nil && info.IsDir()) + } + if perm := info.Mode().Perm(); perm != 0o700 { + t.Fatalf("stageRoot perm=%#o; want %#o", perm, 0o700) + } +} + +func TestCleanupOldRestoreStageDirs_RemovesOnlyOldDirs(t *testing.T) { + fake := NewFakeFS() + t.Cleanup(func() { _ = os.RemoveAll(fake.Root) }) + + base := "/tmp/proxsave" + oldDir := base + "/restore-stage-old" + newDir := base + "/restore-stage-new" + + if err := fake.MkdirAll(base, 0o755); err != nil { + t.Fatalf("MkdirAll(%q): %v", base, err) + } + if err := fake.MkdirAll(oldDir, 0o700); err != nil { + t.Fatalf("MkdirAll(%q): %v", oldDir, err) + } + if err := fake.MkdirAll(newDir, 0o700); err != nil { + t.Fatalf("MkdirAll(%q): %v", newDir, err) + } + if err := fake.WriteFile(base+"/restore-stage-file", []byte("x"), 0o600); err != nil { + t.Fatalf("WriteFile restore-stage-file: %v", err) + } + + now := time.Unix(1700000000, 0).UTC() + oldTime := now.Add(-48 * time.Hour) + newTime := now.Add(-1 * time.Hour) + + if err := os.Chtimes(fake.onDisk(oldDir), oldTime, oldTime); err != nil { + t.Fatalf("Chtimes(oldDir): %v", err) + } + if err := os.Chtimes(fake.onDisk(newDir), newTime, newTime); err != nil { + t.Fatalf("Chtimes(newDir): %v", err) + } + + removed, failed := cleanupOldRestoreStageDirs(fake, nil, now, 24*time.Hour) + if failed != 0 { + t.Fatalf("failed=%d; want 0", failed) + } + if removed != 1 { + t.Fatalf("removed=%d; want 1", removed) + } + + if _, err := fake.Stat(oldDir); err == nil || !os.IsNotExist(err) { + t.Fatalf("oldDir still exists (err=%v); want removed", err) + } + if _, err := fake.Stat(newDir); err != nil { + t.Fatalf("newDir missing (err=%v); want kept", err) + } + if _, err := fake.Stat(base + "/restore-stage-file"); err != nil { + t.Fatalf("restore-stage-file missing (err=%v); want kept", err) + } +} From 96053eecf9dc453d5704767437263f0ca1d71cd3 Mon Sep 17 00:00:00 2001 From: Damiano <71268257+tis24dev@users.noreply.github.com> Date: Mon, 16 Feb 2026 02:16:31 +0100 Subject: [PATCH 15/24] Centralize and reorder PVE service management MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a single pveClusterServices list and use it for starting/stopping PVE services. stopPVEClusterServices now iterates the list in reverse so services are stopped in the correct order (pvestatd → pveproxy → pvedaemon → pve-cluster), and startPVEClusterServices uses the forward order. Update tests to assert stop order rather than just presence, and update restore documentation/diagrams/guides to reflect the new service stop sequence and formatting fixes. --- docs/CLUSTER_RECOVERY.md | 4 +-- docs/RESTORE_DIAGRAMS.md | 29 ++++++++++---------- docs/RESTORE_GUIDE.md | 16 +++++------ internal/orchestrator/restore.go | 8 +++--- internal/orchestrator/restore_errors_test.go | 29 ++++++++++++-------- 5 files changed, 46 insertions(+), 40 deletions(-) diff --git a/docs/CLUSTER_RECOVERY.md b/docs/CLUSTER_RECOVERY.md index 376fdfc..9178d4c 100644 --- a/docs/CLUSTER_RECOVERY.md +++ b/docs/CLUSTER_RECOVERY.md @@ -1575,7 +1575,7 @@ curl -k https://localhost:8006 ```bash # 1. Stop everything -systemctl stop pve-cluster pvedaemon pveproxy pvestatd +systemctl stop pvestatd pveproxy pvedaemon pve-cluster killall pmxcfs 2>/dev/null # 2. Force unmount /etc/pve @@ -1598,7 +1598,7 @@ pvecm status ```bash # 1. Stop and disable services -systemctl stop pve-cluster pvedaemon pveproxy pvestatd +systemctl stop pvestatd pveproxy pvedaemon pve-cluster systemctl disable pve-cluster # 2. Force unmount diff --git a/docs/RESTORE_DIAGRAMS.md b/docs/RESTORE_DIAGRAMS.md index 3dca90a..0f34a35 100644 --- a/docs/RESTORE_DIAGRAMS.md +++ b/docs/RESTORE_DIAGRAMS.md @@ -156,16 +156,15 @@ sequenceDiagram Restore->>Restore: Detect needsClusterRestore = true Note over Restore,Services: Service Stop Phase - Restore->>Services: systemctl stop pve-cluster - Services->>FS: Unmount /etc/pve (FUSE) - Services->>DB: Close file handles - Services-->>Restore: Stopped - - Restore->>Services: systemctl stop pvedaemon + Restore->>Services: systemctl stop pvestatd Services-->>Restore: Stopped Restore->>Services: systemctl stop pveproxy Services-->>Restore: Stopped - Restore->>Services: systemctl stop pvestatd + Restore->>Services: systemctl stop pvedaemon + Services-->>Restore: Stopped + Restore->>Services: systemctl stop pve-cluster + Services->>FS: Unmount /etc/pve (FUSE) + Services->>DB: Close file handles Services-->>Restore: Stopped Note over Restore,FS: Unmount Phase @@ -281,15 +280,15 @@ stateDiagram-v2 Running --> Stopping: User initiates restore state Stopping { - [*] --> StopCluster - StopCluster: systemctl stop pve-cluster - StopCluster --> StopDaemon - StopDaemon: systemctl stop pvedaemon - StopDaemon --> StopProxy - StopProxy: systemctl stop pveproxy - StopProxy --> StopStatd + [*] --> StopStatd StopStatd: systemctl stop pvestatd - StopStatd --> UnmountPVE + StopStatd --> StopProxy + StopProxy: systemctl stop pveproxy + StopProxy --> StopDaemon + StopDaemon: systemctl stop pvedaemon + StopDaemon --> StopCluster + StopCluster: systemctl stop pve-cluster + StopCluster --> UnmountPVE UnmountPVE: umount /etc/pve UnmountPVE --> [*] } diff --git a/docs/RESTORE_GUIDE.md b/docs/RESTORE_GUIDE.md index 5c0e370..2b6468a 100644 --- a/docs/RESTORE_GUIDE.md +++ b/docs/RESTORE_GUIDE.md @@ -916,10 +916,10 @@ Before Restore: └─────────────┘ Stop Phase: - systemctl stop pve-cluster ← /etc/pve unmounted - systemctl stop pvedaemon - systemctl stop pveproxy systemctl stop pvestatd + systemctl stop pveproxy + systemctl stop pvedaemon + systemctl stop pve-cluster ← /etc/pve unmounted umount /etc/pve (if needed) Restore Phase: @@ -2486,12 +2486,12 @@ Use Ctrl+C carefully - wait for current file to finish. **Q: How do I rollback a failed restore?** A: Use the safety backup: -```bash -# Stop services (if cluster restore) -systemctl stop pve-cluster pvedaemon pveproxy pvestatd + ```bash + # Stop services (if cluster restore) + systemctl stop pvestatd pveproxy pvedaemon pve-cluster -# Extract safety backup -tar -xzf /tmp/proxsave/restore_backup_*.tar.gz -C / + # Extract safety backup + tar -xzf /tmp/proxsave/restore_backup_*.tar.gz -C / # Restart services systemctl restart pve-cluster pvedaemon pveproxy pvestatd diff --git a/internal/orchestrator/restore.go b/internal/orchestrator/restore.go index cb72463..06ad2bc 100644 --- a/internal/orchestrator/restore.go +++ b/internal/orchestrator/restore.go @@ -36,6 +36,7 @@ var ( serviceRetryDelay = 500 * time.Millisecond restoreLogSequence uint64 restoreGlob = filepath.Glob + pveClusterServices = [...]string{"pve-cluster", "pvedaemon", "pveproxy", "pvestatd"} ) // RestoreAbortInfo contains information about an aborted restore with network rollback. @@ -141,8 +142,8 @@ func checkZFSPoolsAfterRestore(logger *logging.Logger) error { } func stopPVEClusterServices(ctx context.Context, logger *logging.Logger) error { - services := []string{"pve-cluster", "pvedaemon", "pveproxy", "pvestatd"} - for _, service := range services { + for i := len(pveClusterServices) - 1; i >= 0; i-- { + service := pveClusterServices[i] if err := stopServiceWithRetries(ctx, logger, service); err != nil { return fmt.Errorf("failed to stop PVE services (%s): %w", service, err) } @@ -151,8 +152,7 @@ func stopPVEClusterServices(ctx context.Context, logger *logging.Logger) error { } func startPVEClusterServices(ctx context.Context, logger *logging.Logger) error { - services := []string{"pve-cluster", "pvedaemon", "pveproxy", "pvestatd"} - for _, service := range services { + for _, service := range pveClusterServices { if err := startServiceWithRetries(ctx, logger, service); err != nil { return fmt.Errorf("failed to start PVE services (%s): %w", service, err) } diff --git a/internal/orchestrator/restore_errors_test.go b/internal/orchestrator/restore_errors_test.go index cad33a8..35aa9e7 100644 --- a/internal/orchestrator/restore_errors_test.go +++ b/internal/orchestrator/restore_errors_test.go @@ -191,23 +191,30 @@ func TestStopPVEClusterServices_UsesNoBlock(t *testing.T) { t.Fatalf("expected success stopping PVE services, got %v", err) } - wantStops := []string{ - "systemctl stop --no-block pve-cluster", - "systemctl stop --no-block pvedaemon", - "systemctl stop --no-block pveproxy", + wantStopOrder := []string{ "systemctl stop --no-block pvestatd", + "systemctl stop --no-block pveproxy", + "systemctl stop --no-block pvedaemon", + "systemctl stop --no-block pve-cluster", } - for _, cmd := range wantStops { - found := false - for _, call := range fake.Calls { - if call == cmd { - found = true - break + indexOfCall := func(calls []string, want string) int { + for i, call := range calls { + if call == want { + return i } } - if !found { + return -1 + } + prevIdx := -1 + for _, cmd := range wantStopOrder { + idx := indexOfCall(fake.Calls, cmd) + if idx < 0 { t.Fatalf("expected %s to be called, calls: %#v", cmd, fake.Calls) } + if idx <= prevIdx { + t.Fatalf("expected %s to be called after previous stop command, calls: %#v", cmd, fake.Calls) + } + prevIdx = idx } } From 009d675294d822e0748ecfcee3d772420ea46483 Mon Sep 17 00:00:00 2001 From: Damiano <71268257+tis24dev@users.noreply.github.com> Date: Mon, 16 Feb 2026 02:47:51 +0100 Subject: [PATCH 16/24] Apply PBS API configs in final staged phase Introduce a dedicated API-backed staged apply path for PBS: maybeApplyPBSConfigsViaAPIFromStage. File-based PBS configs are applied while PBS services remain stopped; API-backed categories (node, datastores, remotes, jobs, notifications) are applied in a final API phase which may temporarily start services and will attempt to stop them again after completion. Added fallback handling for merge/clean behaviors, improved error logging, and consolidated file-fallbacks. Update restore workflow to invoke the final PBS API staged apply and move PBS notifications to be applied as part of that phase. Add unit test (pbs_service_restart_order_test.go) to ensure services are not restarted during file-based staged apply and are started during the API phase. Also update docs to describe the new staged-API ordering. --- docs/RESTORE_TECHNICAL.md | 5 +- .../pbs_service_restart_order_test.go | 75 +++++ internal/orchestrator/pbs_staged_apply.go | 293 +++++++++++------- .../orchestrator/restore_notifications.go | 27 +- internal/orchestrator/restore_workflow_ui.go | 7 + 5 files changed, 262 insertions(+), 145 deletions(-) create mode 100644 internal/orchestrator/pbs_service_restart_order_test.go diff --git a/docs/RESTORE_TECHNICAL.md b/docs/RESTORE_TECHNICAL.md index 98340b2..d8f3bb8 100644 --- a/docs/RESTORE_TECHNICAL.md +++ b/docs/RESTORE_TECHNICAL.md @@ -879,6 +879,7 @@ After extraction, **staged categories** are applied from the staging directory u - **Merge**: create/update only (no deletions of existing objects not in the backup). - **Clean 1:1**: attempts 1:1 reconciliation (may remove objects not present in the backup). - If API apply is unavailable or fails, ProxSave may fall back to applying staged `*.cfg` files back to `/etc/proxmox-backup` (**Clean 1:1 only**). +- PBS services are kept stopped while file-based staged configs are written; API-backed categories are applied in a final API phase with services started temporarily. **Current PBS API coverage**: - `pbs_host`: node + traffic control @@ -890,8 +891,8 @@ After extraction, **staged categories** are applied from the staging directory u Other PBS categories remain file-based (e.g. access control, tape, proxy/ACME/metricserver). **Key code paths**: -- `internal/orchestrator/pbs_staged_apply.go` (`maybeApplyPBSConfigsFromStage`) -- `internal/orchestrator/restore_notifications.go` (`maybeApplyNotificationsFromStage`, `pbs_notifications`) +- `internal/orchestrator/pbs_staged_apply.go` (`maybeApplyPBSConfigsFromStage`, `maybeApplyPBSConfigsViaAPIFromStage`) +- `internal/orchestrator/restore_notifications.go` (`maybeApplyNotificationsFromStage`, `pve_notifications`) - `internal/orchestrator/pbs_api_apply.go` / `internal/orchestrator/pbs_notifications_api_apply.go` (API apply engines) ## Category System diff --git a/internal/orchestrator/pbs_service_restart_order_test.go b/internal/orchestrator/pbs_service_restart_order_test.go new file mode 100644 index 0000000..f6999bf --- /dev/null +++ b/internal/orchestrator/pbs_service_restart_order_test.go @@ -0,0 +1,75 @@ +package orchestrator + +import ( + "context" + "errors" + "os" + "slices" + "testing" +) + +func TestPBSServicesNotRestartedDuringFileBasedStagedApply(t *testing.T) { + if os.Geteuid() != 0 { + t.Skip("requires root to exercise staged apply code paths") + } + + origFS := restoreFS + origCmd := restoreCmd + t.Cleanup(func() { + restoreFS = origFS + restoreCmd = origCmd + }) + + restoreFS = osFS{} + fakeCmd := &FakeCommandRunner{} + restoreCmd = fakeCmd + + stageRoot := t.TempDir() + logger := newTestLogger() + plan := &RestorePlan{ + SystemType: SystemTypePBS, + StagedCategories: []Category{ + {ID: "pbs_host", Type: CategoryTypePBS}, + {ID: "pbs_notifications", Type: CategoryTypePBS}, + }, + PBSRestoreBehavior: PBSRestoreBehaviorMerge, + } + + if err := maybeApplyPBSConfigsFromStage(context.Background(), logger, plan, stageRoot, false); err != nil { + t.Fatalf("maybeApplyPBSConfigsFromStage error: %v", err) + } + if len(fakeCmd.Calls) != 0 { + t.Fatalf("expected no commands during file-based staged apply, got %v", fakeCmd.Calls) + } + + if err := maybeApplyNotificationsFromStage(context.Background(), logger, plan, stageRoot, false); err != nil { + t.Fatalf("maybeApplyNotificationsFromStage error: %v", err) + } + if len(fakeCmd.Calls) != 0 { + t.Fatalf("expected no commands during notifications staged apply on PBS, got %v", fakeCmd.Calls) + } + + // Allow the temporary stop at the end of API apply to complete quickly. + if fakeCmd.Outputs == nil { + fakeCmd.Outputs = make(map[string][]byte) + } + if fakeCmd.Errors == nil { + fakeCmd.Errors = make(map[string]error) + } + for _, svc := range []string{"proxmox-backup-proxy", "proxmox-backup"} { + key := "systemctl is-active " + svc + fakeCmd.Outputs[key] = []byte("inactive\n") + fakeCmd.Errors[key] = errors.New("exit status 3") + } + + if err := maybeApplyPBSConfigsViaAPIFromStage(context.Background(), logger, plan, stageRoot, false, true); err != nil { + t.Fatalf("maybeApplyPBSConfigsViaAPIFromStage error: %v", err) + } + + if !slices.Contains(fakeCmd.Calls, "systemctl start proxmox-backup") { + t.Fatalf("expected PBS service start during API phase, calls=%v", fakeCmd.Calls) + } + if !slices.Contains(fakeCmd.Calls, "systemctl stop --no-block proxmox-backup-proxy") { + t.Fatalf("expected PBS service stop after API phase, calls=%v", fakeCmd.Calls) + } +} diff --git a/internal/orchestrator/pbs_staged_apply.go b/internal/orchestrator/pbs_staged_apply.go index ee80a5d..6ee4b5b 100644 --- a/internal/orchestrator/pbs_staged_apply.go +++ b/internal/orchestrator/pbs_staged_apply.go @@ -8,6 +8,7 @@ import ( "os" "path/filepath" "strings" + "time" "github.com/tis24dev/proxsave/internal/logging" ) @@ -40,28 +41,125 @@ func maybeApplyPBSConfigsFromStage(ctx context.Context, logger *logging.Logger, return nil } + behavior := plan.PBSRestoreBehavior + + if plan.HasCategoryID("pbs_host") { + // Restore file-only configs (no stable API coverage yet). + // ACME should be applied before node config (node.cfg references ACME accounts/plugins). + for _, rel := range []string{ + "etc/proxmox-backup/acme/accounts.cfg", + "etc/proxmox-backup/acme/plugins.cfg", + "etc/proxmox-backup/metricserver.cfg", + "etc/proxmox-backup/proxy.cfg", + } { + if err := applyPBSConfigFileFromStage(ctx, logger, stageRoot, rel); err != nil { + logger.Warning("PBS staged apply: %s: %v", rel, err) + } + } + } + + if plan.HasCategoryID("pbs_tape") { + if err := applyPBSTapeConfigsFromStage(ctx, logger, stageRoot); err != nil { + logger.Warning("PBS staged apply: tape configs: %v", err) + } + } + + if behavior == PBSRestoreBehaviorUnspecified { + logging.DebugStep(logger, "pbs staged apply", "PBS restore behavior is unspecified; API-backed PBS categories (node/datastores/remotes/jobs/notifications) will be applied in the final staged API phase") + } + + return nil +} + +func maybeApplyPBSConfigsViaAPIFromStage(ctx context.Context, logger *logging.Logger, plan *RestorePlan, stageRoot string, dryRun bool, stopServicesAfter bool) (err error) { + if plan == nil || plan.SystemType != SystemTypePBS { + return nil + } + if strings.TrimSpace(stageRoot) == "" { + logging.DebugStep(logger, "pbs staged apply (api)", "Skipped: staging directory not available") + return nil + } + // API-backed PBS categories. + if !plan.HasCategoryID("pbs_host") && + !plan.HasCategoryID("datastore_pbs") && + !plan.HasCategoryID("pbs_remotes") && + !plan.HasCategoryID("pbs_jobs") && + !plan.HasCategoryID("pbs_notifications") { + return nil + } + + done := logging.DebugStart(logger, "pbs staged apply (api)", "dryRun=%v stage=%s", dryRun, stageRoot) + defer func() { done(err) }() + + if dryRun { + logger.Info("Dry run enabled: skipping staged PBS API apply") + return nil + } + if !isRealRestoreFS(restoreFS) { + logger.Debug("Skipping staged PBS API apply: non-system filesystem in use") + return nil + } + if os.Geteuid() != 0 { + logger.Warning("Skipping staged PBS API apply: requires root privileges") + return nil + } + behavior := plan.PBSRestoreBehavior strict := behavior == PBSRestoreBehaviorClean allowFileFallback := behavior == PBSRestoreBehaviorClean - needsAPI := plan.HasCategoryID("pbs_host") || plan.HasCategoryID("datastore_pbs") || plan.HasCategoryID("pbs_remotes") || plan.HasCategoryID("pbs_jobs") - apiAvailable := false - var apiUnavailableErr error - if needsAPI { - if apiErr := ensurePBSServicesForAPI(ctx, logger); apiErr != nil { - apiUnavailableErr = apiErr - if allowFileFallback { - logger.Warning("PBS API apply unavailable; falling back to file-based staged apply where possible: %v", apiErr) - } else { - logging.DebugStep(logger, "pbs staged apply", "PBS API apply unavailable; merge mode will skip API-applied PBS categories: %v", apiErr) + ensureAttempted := false + defer func() { + if !stopServicesAfter || !ensureAttempted { + return + } + stopCtx, cancel := context.WithTimeout(context.Background(), 2*serviceStopTimeout+2*serviceVerifyTimeout+10*time.Second) + defer cancel() + if stopErr := stopPBSServices(stopCtx, logger); stopErr != nil { + logger.Warning("Failed to stop PBS services after staged API apply: %v", stopErr) + } + }() + + ensureAttempted = true + if apiErr := ensurePBSServicesForAPI(ctx, logger); apiErr != nil { + if allowFileFallback { + logger.Warning("PBS API apply unavailable; falling back to file-based staged apply where possible: %v", apiErr) + if plan.HasCategoryID("pbs_host") { + for _, rel := range []string{ + "etc/proxmox-backup/traffic-control.cfg", + "etc/proxmox-backup/node.cfg", + } { + if err := applyPBSConfigFileFromStage(ctx, logger, stageRoot, rel); err != nil { + logger.Warning("PBS staged apply: %s: %v", rel, err) + } + } } - } else { - apiAvailable = true + if plan.HasCategoryID("datastore_pbs") { + if err := applyPBSS3CfgFromStage(ctx, logger, stageRoot); err != nil { + logger.Warning("PBS staged apply: s3.cfg: %v", err) + } + if err := applyPBSDatastoreCfgFromStage(ctx, logger, stageRoot); err != nil { + logger.Warning("PBS staged apply: datastore.cfg: %v", err) + } + } + if plan.HasCategoryID("pbs_remotes") { + if err := applyPBSRemoteCfgFromStage(ctx, logger, stageRoot); err != nil { + logger.Warning("PBS staged apply: remote.cfg: %v", err) + } + } + if plan.HasCategoryID("pbs_jobs") { + if err := applyPBSJobConfigsFromStage(ctx, logger, stageRoot); err != nil { + logger.Warning("PBS staged apply: job configs: %v", err) + } + } + if plan.HasCategoryID("pbs_notifications") { + if err := applyPBSNotificationsFromStage(ctx, logger, stageRoot); err != nil { + logger.Warning("PBS staged apply: notifications: %v", err) + } + } + return nil } - } - var mergeAPISkipErr error - if needsAPI && !apiAvailable && !allowFileFallback && apiUnavailableErr != nil { var skipped []string if plan.HasCategoryID("pbs_host") { skipped = append(skipped, "pbs_host (node.cfg/traffic-control.cfg)") @@ -75,140 +173,99 @@ func maybeApplyPBSConfigsFromStage(ctx context.Context, logger *logging.Logger, if plan.HasCategoryID("pbs_jobs") { skipped = append(skipped, "pbs_jobs") } + if plan.HasCategoryID("pbs_notifications") { + skipped = append(skipped, "pbs_notifications") + } if len(skipped) == 0 { skipped = append(skipped, "PBS API categories") } - mergeAPISkipErr = fmt.Errorf("PBS API apply unavailable in %s; skipped %s: %w", behavior.DisplayName(), strings.Join(skipped, ", "), apiUnavailableErr) + return fmt.Errorf("PBS API apply unavailable in %s; skipped %s: %w", behavior.DisplayName(), strings.Join(skipped, ", "), apiErr) } if plan.HasCategoryID("pbs_host") { - // Always restore file-only configs (no stable API coverage yet). - // ACME should be applied before node config (node.cfg references ACME accounts/plugins). - for _, rel := range []string{ - "etc/proxmox-backup/acme/accounts.cfg", - "etc/proxmox-backup/acme/plugins.cfg", - "etc/proxmox-backup/metricserver.cfg", - "etc/proxmox-backup/proxy.cfg", - } { - if err := applyPBSConfigFileFromStage(ctx, logger, stageRoot, rel); err != nil { - logger.Warning("PBS staged apply: %s: %v", rel, err) + if err := applyPBSTrafficControlCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + logger.Warning("PBS API apply: traffic-control failed: %v", err) + if allowFileFallback { + logger.Warning("PBS staged apply: falling back to file-based traffic-control.cfg") + _ = applyPBSConfigFileFromStage(ctx, logger, stageRoot, "etc/proxmox-backup/traffic-control.cfg") } } - - if apiAvailable { - if err := applyPBSTrafficControlCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { - logger.Warning("PBS API apply: traffic-control failed: %v", err) - if allowFileFallback { - logger.Warning("PBS staged apply: falling back to file-based traffic-control.cfg") - _ = applyPBSConfigFileFromStage(ctx, logger, stageRoot, "etc/proxmox-backup/traffic-control.cfg") - } - } - if err := applyPBSNodeCfgViaAPI(ctx, stageRoot); err != nil { - logger.Warning("PBS API apply: node config failed: %v", err) - if allowFileFallback { - logger.Warning("PBS staged apply: falling back to file-based node.cfg") - _ = applyPBSConfigFileFromStage(ctx, logger, stageRoot, "etc/proxmox-backup/node.cfg") - } - } - } else if allowFileFallback { - for _, rel := range []string{ - "etc/proxmox-backup/traffic-control.cfg", - "etc/proxmox-backup/node.cfg", - } { - if err := applyPBSConfigFileFromStage(ctx, logger, stageRoot, rel); err != nil { - logger.Warning("PBS staged apply: %s: %v", rel, err) - } + if err := applyPBSNodeCfgViaAPI(ctx, stageRoot); err != nil { + logger.Warning("PBS API apply: node config failed: %v", err) + if allowFileFallback { + logger.Warning("PBS staged apply: falling back to file-based node.cfg") + _ = applyPBSConfigFileFromStage(ctx, logger, stageRoot, "etc/proxmox-backup/node.cfg") } - } else { - logging.DebugStep(logger, "pbs staged apply", "Skipping node.cfg/traffic-control.cfg: merge mode requires PBS API apply") } } if plan.HasCategoryID("datastore_pbs") { - if apiAvailable { - if err := applyPBSS3CfgViaAPI(ctx, logger, stageRoot, strict); err != nil { - logger.Warning("PBS API apply: s3.cfg failed: %v", err) - if allowFileFallback { - logger.Warning("PBS staged apply: falling back to file-based s3.cfg") - _ = applyPBSS3CfgFromStage(ctx, logger, stageRoot) - } - } - if err := applyPBSDatastoreCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { - logger.Warning("PBS API apply: datastore.cfg failed: %v", err) - if allowFileFallback { - logger.Warning("PBS staged apply: falling back to file-based datastore.cfg") - _ = applyPBSDatastoreCfgFromStage(ctx, logger, stageRoot) - } - } - } else if allowFileFallback { - if err := applyPBSS3CfgFromStage(ctx, logger, stageRoot); err != nil { - logger.Warning("PBS staged apply: s3.cfg: %v", err) + if err := applyPBSS3CfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + logger.Warning("PBS API apply: s3.cfg failed: %v", err) + if allowFileFallback { + logger.Warning("PBS staged apply: falling back to file-based s3.cfg") + _ = applyPBSS3CfgFromStage(ctx, logger, stageRoot) } - if err := applyPBSDatastoreCfgFromStage(ctx, logger, stageRoot); err != nil { - logger.Warning("PBS staged apply: datastore.cfg: %v", err) + } + if err := applyPBSDatastoreCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + logger.Warning("PBS API apply: datastore.cfg failed: %v", err) + if allowFileFallback { + logger.Warning("PBS staged apply: falling back to file-based datastore.cfg") + _ = applyPBSDatastoreCfgFromStage(ctx, logger, stageRoot) } - } else { - logging.DebugStep(logger, "pbs staged apply", "Skipping datastore.cfg/s3.cfg: merge mode requires PBS API apply") } } if plan.HasCategoryID("pbs_remotes") { - if apiAvailable { - if err := applyPBSRemoteCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { - logger.Warning("PBS API apply: remote.cfg failed: %v", err) - if allowFileFallback { - logger.Warning("PBS staged apply: falling back to file-based remote.cfg") - _ = applyPBSRemoteCfgFromStage(ctx, logger, stageRoot) - } - } - } else if allowFileFallback { - if err := applyPBSRemoteCfgFromStage(ctx, logger, stageRoot); err != nil { - logger.Warning("PBS staged apply: remote.cfg: %v", err) + if err := applyPBSRemoteCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + logger.Warning("PBS API apply: remote.cfg failed: %v", err) + if allowFileFallback { + logger.Warning("PBS staged apply: falling back to file-based remote.cfg") + _ = applyPBSRemoteCfgFromStage(ctx, logger, stageRoot) } - } else { - logging.DebugStep(logger, "pbs staged apply", "Skipping remote.cfg: merge mode requires PBS API apply") } } if plan.HasCategoryID("pbs_jobs") { - if apiAvailable { - if err := applyPBSSyncCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { - logger.Warning("PBS API apply: sync jobs failed: %v", err) - if allowFileFallback { - logger.Warning("PBS staged apply: falling back to file-based job configs") - _ = applyPBSJobConfigsFromStage(ctx, logger, stageRoot) - } - } - if err := applyPBSVerificationCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { - logger.Warning("PBS API apply: verification jobs failed: %v", err) - if allowFileFallback { - logger.Warning("PBS staged apply: falling back to file-based job configs") - _ = applyPBSJobConfigsFromStage(ctx, logger, stageRoot) - } - } - if err := applyPBSPruneCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { - logger.Warning("PBS API apply: prune jobs failed: %v", err) - if allowFileFallback { - logger.Warning("PBS staged apply: falling back to file-based job configs") - _ = applyPBSJobConfigsFromStage(ctx, logger, stageRoot) - } + fallbackApplied := false + applyFallback := func() { + if !allowFileFallback || fallbackApplied { + return } - } else if allowFileFallback { - if err := applyPBSJobConfigsFromStage(ctx, logger, stageRoot); err != nil { - logger.Warning("PBS staged apply: job configs: %v", err) - } - } else { - logging.DebugStep(logger, "pbs staged apply", "Skipping sync/verification/prune configs: merge mode requires PBS API apply") + fallbackApplied = true + logger.Warning("PBS staged apply: falling back to file-based job configs") + _ = applyPBSJobConfigsFromStage(ctx, logger, stageRoot) + } + + if err := applyPBSSyncCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + logger.Warning("PBS API apply: sync jobs failed: %v", err) + applyFallback() + } + if err := applyPBSVerificationCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + logger.Warning("PBS API apply: verification jobs failed: %v", err) + applyFallback() + } + if err := applyPBSPruneCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + logger.Warning("PBS API apply: prune jobs failed: %v", err) + applyFallback() } } - if plan.HasCategoryID("pbs_tape") { - if err := applyPBSTapeConfigsFromStage(ctx, logger, stageRoot); err != nil { - logger.Warning("PBS staged apply: tape configs: %v", err) + if plan.HasCategoryID("pbs_notifications") { + if err := applyPBSNotificationsViaAPI(ctx, logger, stageRoot, strict); err != nil { + logger.Warning("PBS notifications API apply failed: %v", err) + if allowFileFallback { + logger.Warning("PBS notifications API apply failed; falling back to file-based apply") + _ = applyPBSNotificationsFromStage(ctx, logger, stageRoot) + } else { + logger.Warning("PBS notifications API apply failed; skipping apply (merge mode)") + } + } else { + logger.Info("PBS notifications applied via API (%s)", behavior.DisplayName()) } } - return mergeAPISkipErr + return nil } func applyPBSRemoteCfgFromStage(ctx context.Context, logger *logging.Logger, stageRoot string) (err error) { diff --git a/internal/orchestrator/restore_notifications.go b/internal/orchestrator/restore_notifications.go index 3774a5d..6c30459 100644 --- a/internal/orchestrator/restore_notifications.go +++ b/internal/orchestrator/restore_notifications.go @@ -53,31 +53,8 @@ func maybeApplyNotificationsFromStage(ctx context.Context, logger *logging.Logge switch plan.SystemType { case SystemTypePBS: - if !plan.HasCategoryID("pbs_notifications") { - return nil - } - behavior := plan.PBSRestoreBehavior - strict := behavior == PBSRestoreBehaviorClean - allowFileFallback := behavior == PBSRestoreBehaviorClean - - if err := ensurePBSServicesForAPI(ctx, logger); err != nil { - if allowFileFallback { - logger.Warning("PBS notifications API apply unavailable; falling back to file-based apply: %v", err) - return applyPBSNotificationsFromStage(ctx, logger, stageRoot) - } - logger.Warning("PBS notifications API apply unavailable; skipping apply (merge mode): %v", err) - return nil - } - - if err := applyPBSNotificationsViaAPI(ctx, logger, stageRoot, strict); err != nil { - if allowFileFallback { - logger.Warning("PBS notifications API apply failed; falling back to file-based apply: %v", err) - return applyPBSNotificationsFromStage(ctx, logger, stageRoot) - } - logger.Warning("PBS notifications API apply failed; skipping apply (merge mode): %v", err) - return nil - } - logger.Info("PBS notifications applied via API (%s)", behavior.DisplayName()) + // PBS notification restore is applied as part of the final PBS API staged apply phase + // to avoid restarting PBS services before other file-based staged apply steps complete. return nil case SystemTypePVE: if !plan.HasCategoryID("pve_notifications") { diff --git a/internal/orchestrator/restore_workflow_ui.go b/internal/orchestrator/restore_workflow_ui.go index d57f262..1853921 100644 --- a/internal/orchestrator/restore_workflow_ui.go +++ b/internal/orchestrator/restore_workflow_ui.go @@ -602,6 +602,13 @@ func runRestoreWorkflowWithUI(ctx context.Context, cfg *config.Config, logger *l restoreHadWarnings = true logger.Warning("Notifications staged apply: %v", err) } + if err := maybeApplyPBSConfigsViaAPIFromStage(ctx, logger, plan, stageRoot, cfg.DryRun, pbsServicesStopped); err != nil { + if errors.Is(err, ErrRestoreAborted) || input.IsAborted(err) { + return err + } + restoreHadWarnings = true + logger.Warning("PBS staged API apply: %v", err) + } } stageRootForNetworkApply := stageRoot From 346dea06c15988881608537421df9157a72d066d Mon Sep 17 00:00:00 2001 From: Damiano <71268257+tis24dev@users.noreply.github.com> Date: Mon, 16 Feb 2026 10:03:55 +0100 Subject: [PATCH 17/24] Stream and cancel AnalyzeBackupCategories Change AnalyzeBackupCategories to accept a context and scan archives streaming (O(1) memory), allowing cancellation and robust error handling. Avoid double-closing underlying files from decompression readers and detect category availability on-the-fly. Expose analyzeBackupCategoriesFunc to allow injection and fall back to a safe full restore with a user-facing message when analysis fails. Update collectArchivePaths to return errors and update tests: add a truncated-tar read error test and a workflow test that verifies fallback behavior; adjust existing tests to the new API and behavior. Misc: add required imports and minor housekeeping. --- internal/orchestrator/restore_errors_test.go | 42 ++++++- .../orchestrator/restore_workflow_test.go | 106 ++++++++++++++++++ internal/orchestrator/restore_workflow_ui.go | 68 +++++++---- internal/orchestrator/selective.go | 75 ++++++++++--- .../orchestrator/selective_additional_test.go | 5 +- 5 files changed, 258 insertions(+), 38 deletions(-) diff --git a/internal/orchestrator/restore_errors_test.go b/internal/orchestrator/restore_errors_test.go index 35aa9e7..17a0ea0 100644 --- a/internal/orchestrator/restore_errors_test.go +++ b/internal/orchestrator/restore_errors_test.go @@ -26,12 +26,52 @@ func TestAnalyzeBackupCategories_OpenError(t *testing.T) { restoreFS = fakeFS logger := logging.New(logging.GetDefaultLogger().GetLevel(), false) - _, err := AnalyzeBackupCategories("/missing/archive.tar", logger) + _, err := AnalyzeBackupCategories(context.Background(), "/missing/archive.tar", logger) if err == nil { t.Fatalf("expected error when archive cannot be opened") } } +func TestAnalyzeBackupCategories_TarReadError(t *testing.T) { + orig := restoreFS + defer func() { restoreFS = orig }() + fakeFS := NewFakeFS() + defer func() { _ = os.RemoveAll(fakeFS.Root) }() + restoreFS = fakeFS + logger := logging.New(logging.GetDefaultLogger().GetLevel(), false) + + payload := bytes.Repeat([]byte("a"), 2048) + var buf bytes.Buffer + tw := tar.NewWriter(&buf) + if err := tw.WriteHeader(&tar.Header{Name: "etc/hosts", Mode: 0o644, Size: int64(len(payload))}); err != nil { + t.Fatalf("WriteHeader: %v", err) + } + if _, err := tw.Write(payload); err != nil { + t.Fatalf("Write: %v", err) + } + if err := tw.Close(); err != nil { + t.Fatalf("Close: %v", err) + } + + full := buf.Bytes() + if len(full) < 700 { + t.Fatalf("unexpected tar size: %d", len(full)) + } + truncated := full[:700] + + if err := fakeFS.AddFile("/broken.tar", truncated); err != nil { + t.Fatalf("AddFile: %v", err) + } + + _, err := AnalyzeBackupCategories(context.Background(), "/broken.tar", logger) + if err == nil { + t.Fatalf("expected error for truncated tar archive") + } + if !strings.Contains(err.Error(), "read archive entries") { + t.Fatalf("unexpected error: %v", err) + } +} + func TestRunRestoreCommandStream_UsesStreamingRunner(t *testing.T) { orig := restoreCmd defer func() { restoreCmd = orig }() diff --git a/internal/orchestrator/restore_workflow_test.go b/internal/orchestrator/restore_workflow_test.go index e6d836f..f4af40f 100644 --- a/internal/orchestrator/restore_workflow_test.go +++ b/internal/orchestrator/restore_workflow_test.go @@ -3,6 +3,7 @@ package orchestrator import ( "archive/tar" "context" + "errors" "os" "path/filepath" "testing" @@ -154,3 +155,108 @@ func TestRunRestoreWorkflow_ConfirmFalseAborts(t *testing.T) { t.Fatalf("err=%v; want %v", err, ErrRestoreAborted) } } + +func TestRunRestoreWorkflow_AnalysisFailure_FallsBackToSafeFullRestore(t *testing.T) { + origRestoreFS := restoreFS + origRestoreCmd := restoreCmd + origRestoreSystem := restoreSystem + origRestoreTime := restoreTime + origCompatFS := compatFS + origPrepare := prepareRestoreBundleFunc + origAnalyze := analyzeBackupCategoriesFunc + origSafetyFS := safetyFS + origSafetyNow := safetyNow + t.Cleanup(func() { + restoreFS = origRestoreFS + restoreCmd = origRestoreCmd + restoreSystem = origRestoreSystem + restoreTime = origRestoreTime + compatFS = origCompatFS + prepareRestoreBundleFunc = origPrepare + analyzeBackupCategoriesFunc = origAnalyze + safetyFS = origSafetyFS + safetyNow = origSafetyNow + }) + + fakeFS := NewFakeFS() + t.Cleanup(func() { _ = os.RemoveAll(fakeFS.Root) }) + restoreFS = fakeFS + compatFS = fakeFS + safetyFS = fakeFS + + fakeNow := &FakeTime{Current: time.Date(2020, 1, 2, 3, 4, 5, 0, time.UTC)} + restoreTime = fakeNow + safetyNow = fakeNow.Now + + // Make compatibility detection treat this as PBS (to avoid compatibility prompts). + if err := fakeFS.AddDir("/etc/proxmox-backup"); err != nil { + t.Fatalf("fakeFS.AddDir: %v", err) + } + restoreSystem = fakeSystemDetector{systemType: SystemTypePVE} + + restoreCmd = &FakeCommandRunner{ + Outputs: map[string][]byte{ + "ip route show default": []byte(""), + }, + Errors: map[string]error{}, + } + + tmpTar := filepath.Join(t.TempDir(), "bundle.tar") + if err := writeTarFile(tmpTar, map[string]string{ + "etc/hosts": "127.0.0.1 localhost\n", + }); err != nil { + t.Fatalf("writeTarFile: %v", err) + } + tarBytes, err := os.ReadFile(tmpTar) + if err != nil { + t.Fatalf("ReadFile tar: %v", err) + } + if err := fakeFS.WriteFile("/bundle.tar", tarBytes, 0o640); err != nil { + t.Fatalf("fakeFS.WriteFile: %v", err) + } + + prepareRestoreBundleFunc = func(ctx context.Context, cfg *config.Config, logger *logging.Logger, version string, ui RestoreWorkflowUI) (*decryptCandidate, *preparedBundle, error) { + cand := &decryptCandidate{ + DisplayBase: "test", + Manifest: &backup.Manifest{ + CreatedAt: fakeNow.Now(), + ClusterMode: "standalone", + ProxmoxType: "pbs", + ScriptVersion: "vtest", + }, + } + prepared := &preparedBundle{ + ArchivePath: "/bundle.tar", + Manifest: backup.Manifest{ArchivePath: "/bundle.tar"}, + cleanup: func() {}, + } + return cand, prepared, nil + } + + analyzeBackupCategoriesFunc = func(ctx context.Context, archivePath string, logger *logging.Logger) ([]Category, error) { + return nil, errors.New("simulated analysis failure") + } + + logger := logging.New(logging.GetDefaultLogger().GetLevel(), false) + cfg := &config.Config{BaseDir: "/base"} + ui := &fakeRestoreWorkflowUI{ + confirmRestore: true, + modeErr: errors.New("unexpected SelectRestoreMode call"), + } + + if err := runRestoreWorkflowWithUI(context.Background(), cfg, logger, "vtest", ui); err != nil { + t.Fatalf("runRestoreWorkflowWithUI error: %v", err) + } + + data, err := fakeFS.ReadFile("/tmp/proxsave/restore_backup_location.txt") + if err != nil { + t.Fatalf("expected safety backup location file: %v", err) + } + want := "/tmp/proxsave/restore_backup_20200102_030405.tar.gz" + if got := string(data); got != want { + t.Fatalf("restore_backup_location.txt=%q want %q", got, want) + } + if _, err := fakeFS.Stat(want); err != nil { + t.Fatalf("expected safety backup archive %s to exist: %v", want, err) + } +} diff --git a/internal/orchestrator/restore_workflow_ui.go b/internal/orchestrator/restore_workflow_ui.go index 1853921..5244f6c 100644 --- a/internal/orchestrator/restore_workflow_ui.go +++ b/internal/orchestrator/restore_workflow_ui.go @@ -17,6 +17,7 @@ import ( ) var prepareRestoreBundleFunc = prepareRestoreBundleWithUI +var analyzeBackupCategoriesFunc = AnalyzeBackupCategories func prepareRestoreBundleWithUI(ctx context.Context, cfg *config.Config, logger *logging.Logger, version string, ui RestoreWorkflowUI) (*decryptCandidate, *preparedBundle, error) { candidate, err := selectBackupCandidateWithUI(ctx, ui, cfg, logger, false) @@ -92,42 +93,63 @@ func runRestoreWorkflowWithUI(ctx context.Context, cfg *config.Config, logger *l } logger.Info("Analyzing backup contents...") - availableCategories, err := AnalyzeBackupCategories(prepared.ArchivePath, logger) - if err != nil { - logger.Warning("Could not analyze categories: %v", err) - logger.Info("Falling back to full restore mode") - return runFullRestoreWithUI(ctx, ui, candidate, prepared, destRoot, logger, cfg.DryRun) - } + availableCategories, analysisErr := analyzeBackupCategoriesFunc(ctx, prepared.ArchivePath, logger) var ( mode RestoreMode selectedCategories []Category ) - for { - mode, err = ui.SelectRestoreMode(ctx, systemType) - if err != nil { + + if analysisErr != nil { + logger.Warning("Backup category analysis failed: %v", analysisErr) + if err := ui.ShowMessage(ctx, "Safe full restore (analysis unavailable)", + "Backup category analysis failed.\n\n"+ + "ProxSave will proceed with a SAFE full restore.\n\n"+ + "Safety features remain enabled (staging, transactional apply with rollback timers, and safety backups).\n\n"+ + "Note: Categories missing from the archive will be skipped automatically."); err != nil { return err } - if mode != RestoreModeCustom { - selectedCategories = GetCategoriesForMode(mode, systemType, availableCategories) - break + mode = RestoreModeFull + + backupType := DetectBackupType(candidate.Manifest) + switch backupType { + case SystemTypePVE, SystemTypePBS: + availableCategories = GetCategoriesForSystem(string(backupType)) + if len(availableCategories) == 0 { + availableCategories = GetAllCategories() + } + default: + availableCategories = GetAllCategories() } + selectedCategories = append([]Category{}, availableCategories...) + } else { + for { + mode, err = ui.SelectRestoreMode(ctx, systemType) + if err != nil { + return err + } - selectedCategories, err = ui.SelectCategories(ctx, availableCategories, systemType) - if err != nil { - if errors.Is(err, errRestoreBackToMode) { - continue + if mode != RestoreModeCustom { + selectedCategories = GetCategoriesForMode(mode, systemType, availableCategories) + break } - return err + + selectedCategories, err = ui.SelectCategories(ctx, availableCategories, systemType) + if err != nil { + if errors.Is(err, errRestoreBackToMode) { + continue + } + return err + } + break } - break - } - if mode == RestoreModeCustom { - selectedCategories, err = maybeAddRecommendedCategoriesForTFA(ctx, ui, logger, selectedCategories, availableCategories) - if err != nil { - return err + if mode == RestoreModeCustom { + selectedCategories, err = maybeAddRecommendedCategoriesForTFA(ctx, ui, logger, selectedCategories, availableCategories) + if err != nil { + return err + } } } diff --git a/internal/orchestrator/selective.go b/internal/orchestrator/selective.go index a6a4a61..733e1bc 100644 --- a/internal/orchestrator/selective.go +++ b/internal/orchestrator/selective.go @@ -6,6 +6,7 @@ import ( "context" "errors" "fmt" + "io" "os" "path" "sort" @@ -25,12 +26,17 @@ type SelectiveRestoreConfig struct { Metadata *backup.Manifest } -// AnalyzeBackupCategories detects which categories are available in the backup -func AnalyzeBackupCategories(archivePath string, logger *logging.Logger) (categories []Category, err error) { +// AnalyzeBackupCategories detects which categories are available in the backup. +// It scans the archive streamingly (O(1) memory) and supports cancellation via ctx. +func AnalyzeBackupCategories(ctx context.Context, archivePath string, logger *logging.Logger) (categories []Category, err error) { done := logging.DebugStart(logger, "analyze backup categories", "archive=%s", archivePath) defer func() { done(err) }() logger.Info("Analyzing backup categories...") + if ctx == nil { + ctx = context.Background() + } + // Open the archive and read all entry names file, err := restoreFS.Open(archivePath) if err != nil { @@ -39,22 +45,63 @@ func AnalyzeBackupCategories(archivePath string, logger *logging.Logger) (catego defer file.Close() // Create appropriate reader based on compression - reader, err := createDecompressionReader(context.Background(), file, archivePath) + reader, err := createDecompressionReader(ctx, file, archivePath) if err != nil { return nil, err } - defer func() { - if closer, ok := reader.(interface{ Close() error }); ok { - closer.Close() + if closer, ok := reader.(interface{ Close() error }); ok { + // When the archive isn't compressed, createDecompressionReader returns the same *os.File. + // Avoid double-closing; the underlying file is already closed by the defer above. + if rf, ok := reader.(*os.File); !ok || rf != file { + defer closer.Close() } - }() + } tarReader := tar.NewReader(reader) - archivePaths := collectArchivePaths(tarReader) - logger.Debug("Found %d entries in archive", len(archivePaths)) + allCategories := GetAllCategories() + if len(allCategories) == 0 { + return nil, nil + } + found := make([]bool, len(allCategories)) + foundCount := 0 + entriesScanned := 0 +scanLoop: + for { + if err := ctx.Err(); err != nil { + return nil, err + } + header, err := tarReader.Next() + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return nil, fmt.Errorf("read archive entries: %w", err) + } + entriesScanned++ - availableCategories := AnalyzeArchivePaths(archivePaths, GetAllCategories()) + for i := range allCategories { + if found[i] { + continue + } + if archiveEntryMatchesCategory(header.Name, allCategories[i]) { + found[i] = true + allCategories[i].IsAvailable = true + foundCount++ + if foundCount == len(allCategories) { + break scanLoop + } + } + } + } + logger.Debug("Scanned %d entries in archive", entriesScanned) + + availableCategories := make([]Category, 0, foundCount) + for i, cat := range allCategories { + if found[i] { + availableCategories = append(availableCategories, cat) + } + } for _, cat := range availableCategories { logger.Debug("Category available: %s (%s)", cat.ID, cat.Name) } @@ -93,16 +140,18 @@ func AnalyzeArchivePaths(archivePaths []string, allCategories []Category) []Cate return availableCategories } -func collectArchivePaths(tarReader *tar.Reader) []string { +func collectArchivePaths(tarReader *tar.Reader) ([]string, error) { var archivePaths []string for { header, err := tarReader.Next() if err != nil { - break // EOF or error + if errors.Is(err, io.EOF) { + return archivePaths, nil + } + return archivePaths, err } archivePaths = append(archivePaths, header.Name) } - return archivePaths } // pathMatchesPattern checks if an archive path matches a category pattern diff --git a/internal/orchestrator/selective_additional_test.go b/internal/orchestrator/selective_additional_test.go index e6fac52..e40256f 100644 --- a/internal/orchestrator/selective_additional_test.go +++ b/internal/orchestrator/selective_additional_test.go @@ -42,7 +42,10 @@ func TestCollectArchivePaths(t *testing.T) { } tr := tar.NewReader(bytes.NewReader(buf.Bytes())) - got := collectArchivePaths(tr) + got, err := collectArchivePaths(tr) + if err != nil { + t.Fatalf("collectArchivePaths() error: %v", err) + } if len(got) != len(entries) { t.Fatalf("collectArchivePaths() len=%d; want %d (paths=%v)", len(got), len(entries), got) From 6a51e09416caad707f10c36eb20497e5837af0a2 Mon Sep 17 00:00:00 2001 From: Damiano <71268257+tis24dev@users.noreply.github.com> Date: Mon, 16 Feb 2026 19:52:18 +0100 Subject: [PATCH 18/24] Improve compatibility, staging and firewall cleanup Refine compatibility detection and messages, enhance staging directory creation, and ensure firewall rollback cleans up scripts. - ValidateCompatibility: clearer error/warning messages when current system or backup type cannot be detected; include backup/system type info where available and adjust tests. - Tests: add tests for unknown backup type detection, firewall rollback script removal, and staging directory uniqueness; update restore manifest tests to include ProxmoxType. - FakeFS: allow onDisk to accept already-mapped paths so helpers returning OS paths work with the fake FS. - Staging: remove atomic sequence variable and switch to using MkdirTemp with a timestamp/pid pattern to create unique staging dirs; update tests accordingly. - Firewall rollback: remove rollback script file when disarming and add corresponding test; log failures without failing the flow. These changes improve robustness of restore operations and make tests more reliable. --- internal/orchestrator/compatibility.go | 10 +++--- internal/orchestrator/compatibility_test.go | 20 ++++++++++++ internal/orchestrator/deps_test.go | 6 ++++ internal/orchestrator/restore_firewall.go | 6 ++++ .../orchestrator/restore_firewall_test.go | 29 +++++++++++++++++ .../orchestrator/restore_workflow_test.go | 2 ++ internal/orchestrator/staging.go | 16 +++------- internal/orchestrator/staging_test.go | 31 ++++++++++++++++--- 8 files changed, 100 insertions(+), 20 deletions(-) diff --git a/internal/orchestrator/compatibility.go b/internal/orchestrator/compatibility.go index cc2eb3f..433079b 100644 --- a/internal/orchestrator/compatibility.go +++ b/internal/orchestrator/compatibility.go @@ -70,14 +70,16 @@ func ValidateCompatibility(manifest *backup.Manifest) error { currentSystem := DetectCurrentSystem() backupType := DetectBackupType(manifest) - // If we can't detect either, issue a warning but allow + // If we can't detect the system types, issue a warning but allow. if currentSystem == SystemTypeUnknown { - return fmt.Errorf("warning: cannot detect current system type - restoration may fail") + if backupType == SystemTypeUnknown { + return fmt.Errorf("warning: cannot detect current system type nor backup type - compatibility cannot be validated") + } + return fmt.Errorf("warning: cannot detect current system type (backup appears to be %s) - restoration may fail", strings.ToUpper(string(backupType))) } if backupType == SystemTypeUnknown { - // If backup type is unknown, we can't validate - issue warning - return nil // Allow but warn in calling code + return fmt.Errorf("warning: cannot detect backup type from manifest (current system is %s) - compatibility cannot be validated", strings.ToUpper(string(currentSystem))) } // Check for incompatibility diff --git a/internal/orchestrator/compatibility_test.go b/internal/orchestrator/compatibility_test.go index 5c7ef98..f4c356a 100644 --- a/internal/orchestrator/compatibility_test.go +++ b/internal/orchestrator/compatibility_test.go @@ -2,6 +2,7 @@ package orchestrator import ( "os" + "strings" "testing" "github.com/tis24dev/proxsave/internal/backup" @@ -24,6 +25,25 @@ func TestValidateCompatibility_Mismatch(t *testing.T) { } } +func TestValidateCompatibility_UnknownBackupTypeWarns(t *testing.T) { + orig := compatFS + defer func() { compatFS = orig }() + + fake := NewFakeFS() + defer func() { _ = os.RemoveAll(fake.Root) }() + compatFS = fake + if err := fake.AddDir("/etc/pve"); err != nil { + t.Fatalf("add dir: %v", err) + } + + manifest := &backup.Manifest{} + if err := ValidateCompatibility(manifest); err == nil { + t.Fatalf("expected warning for unknown backup type") + } else if !strings.Contains(err.Error(), "cannot detect backup type") { + t.Fatalf("unexpected warning: %v", err) + } +} + func TestDetectCurrentSystem_Unknown(t *testing.T) { orig := compatFS defer func() { compatFS = orig }() diff --git a/internal/orchestrator/deps_test.go b/internal/orchestrator/deps_test.go index ba4d404..dc90cc4 100644 --- a/internal/orchestrator/deps_test.go +++ b/internal/orchestrator/deps_test.go @@ -36,6 +36,12 @@ func NewFakeFS() *FakeFS { func (f *FakeFS) onDisk(path string) string { clean := filepath.Clean(path) + // Allow passing already-mapped on-disk paths back into FakeFS methods. + // This is important for helpers that return OS paths (e.g., MkdirTemp/CreateTemp) + // which are then reused by other FS operations in tests. + if clean == f.Root || strings.HasPrefix(clean, f.Root+string(filepath.Separator)) { + return clean + } clean = strings.TrimPrefix(clean, string(filepath.Separator)) return filepath.Join(f.Root, clean) } diff --git a/internal/orchestrator/restore_firewall.go b/internal/orchestrator/restore_firewall.go index 64c7419..4b91b90 100644 --- a/internal/orchestrator/restore_firewall.go +++ b/internal/orchestrator/restore_firewall.go @@ -491,6 +491,12 @@ func disarmFirewallRollback(ctx context.Context, logger *logging.Logger, handle _, _ = restoreCmd.Run(ctx, "systemctl", "stop", timerUnit) _, _ = restoreCmd.Run(ctx, "systemctl", "reset-failed", strings.TrimSpace(handle.unitName)+".service", timerUnit) } + + if strings.TrimSpace(handle.scriptPath) != "" { + if err := restoreFS.Remove(handle.scriptPath); err != nil && !errors.Is(err, os.ErrNotExist) { + logger.Warning("Failed to remove firewall rollback script %s: %v", handle.scriptPath, err) + } + } } func buildFirewallRollbackScript(markerPath, backupPath, logPath string) string { diff --git a/internal/orchestrator/restore_firewall_test.go b/internal/orchestrator/restore_firewall_test.go index 5ce76d5..c1fabf6 100644 --- a/internal/orchestrator/restore_firewall_test.go +++ b/internal/orchestrator/restore_firewall_test.go @@ -1,6 +1,7 @@ package orchestrator import ( + "context" "os" "testing" ) @@ -46,6 +47,34 @@ func TestSyncDirExact_PrunesExtraneousFiles(t *testing.T) { } } +func TestDisarmFirewallRollback_RemovesMarkerAndScript(t *testing.T) { + origFS := restoreFS + t.Cleanup(func() { restoreFS = origFS }) + + fakeFS := NewFakeFS() + restoreFS = fakeFS + + handle := &firewallRollbackHandle{ + markerPath: "/tmp/proxsave/firewall_rollback_pending_test", + scriptPath: "/tmp/proxsave/firewall_rollback_test.sh", + } + if err := fakeFS.AddFile(handle.markerPath, []byte("pending\n")); err != nil { + t.Fatalf("add marker: %v", err) + } + if err := fakeFS.AddFile(handle.scriptPath, []byte("#!/bin/sh\nexit 0\n")); err != nil { + t.Fatalf("add script: %v", err) + } + + disarmFirewallRollback(context.Background(), newTestLogger(), handle) + + if _, err := fakeFS.Stat(handle.markerPath); err == nil || !os.IsNotExist(err) { + t.Fatalf("expected marker to be removed; stat err=%v", err) + } + if _, err := fakeFS.Stat(handle.scriptPath); err == nil || !os.IsNotExist(err) { + t.Fatalf("expected script to be removed; stat err=%v", err) + } +} + func TestApplyPVEFirewallFromStage_AppliesFirewallAndHostFW(t *testing.T) { origFS := restoreFS t.Cleanup(func() { restoreFS = origFS }) diff --git a/internal/orchestrator/restore_workflow_test.go b/internal/orchestrator/restore_workflow_test.go index f4af40f..f1ee4c2 100644 --- a/internal/orchestrator/restore_workflow_test.go +++ b/internal/orchestrator/restore_workflow_test.go @@ -80,6 +80,7 @@ func TestRunRestoreWorkflow_CustomModeNoCategories_Succeeds(t *testing.T) { Manifest: &backup.Manifest{ CreatedAt: time.Unix(1700000000, 0), ClusterMode: "standalone", + ProxmoxType: "pve", ScriptVersion: "1.0.0", }, } @@ -131,6 +132,7 @@ func TestRunRestoreWorkflow_ConfirmFalseAborts(t *testing.T) { Manifest: &backup.Manifest{ CreatedAt: time.Unix(1700000000, 0), ClusterMode: "standalone", + ProxmoxType: "pve", ScriptVersion: "1.0.0", }, } diff --git a/internal/orchestrator/staging.go b/internal/orchestrator/staging.go index 59a7344..a418f0f 100644 --- a/internal/orchestrator/staging.go +++ b/internal/orchestrator/staging.go @@ -5,14 +5,11 @@ import ( "os" "path/filepath" "strings" - "sync/atomic" "time" "github.com/tis24dev/proxsave/internal/logging" ) -var restoreStageSequence uint64 - func isStagedCategoryID(id string) bool { switch strings.TrimSpace(id) { case "network", @@ -51,21 +48,16 @@ func splitRestoreCategories(categories []Category) (normal []Category, staged [] return normal, staged, export } -func stageDestRoot() string { - base := "/tmp/proxsave" - seq := atomic.AddUint64(&restoreStageSequence, 1) - return filepath.Join(base, fmt.Sprintf("restore-stage-%s_pid%d_%d", nowRestore().Format("20060102-150405"), os.Getpid(), seq)) -} - func createRestoreStageDir() (string, error) { base := "/tmp/proxsave" if err := restoreFS.MkdirAll(base, 0o755); err != nil { return "", fmt.Errorf("ensure staging base directory %s: %w", base, err) } - dir := stageDestRoot() - if err := restoreFS.MkdirAll(dir, 0o700); err != nil { - return "", fmt.Errorf("create staging directory %s: %w", dir, err) + pattern := fmt.Sprintf("restore-stage-%s_pid%d-", nowRestore().Format("20060102-150405"), os.Getpid()) + dir, err := restoreFS.MkdirTemp(base, pattern) + if err != nil { + return "", fmt.Errorf("create staging directory under %s: %w", base, err) } return dir, nil } diff --git a/internal/orchestrator/staging_test.go b/internal/orchestrator/staging_test.go index b3e840f..7e6610a 100644 --- a/internal/orchestrator/staging_test.go +++ b/internal/orchestrator/staging_test.go @@ -10,24 +10,21 @@ import ( func TestCreateRestoreStageDir_Creates0700Directory(t *testing.T) { origFS := restoreFS origTime := restoreTime - origSeq := restoreStageSequence t.Cleanup(func() { restoreFS = origFS restoreTime = origTime - restoreStageSequence = origSeq }) fake := NewFakeFS() t.Cleanup(func() { _ = os.RemoveAll(fake.Root) }) restoreFS = fake restoreTime = &FakeTime{Current: time.Unix(1700000000, 0)} - restoreStageSequence = 0 stageRoot, err := createRestoreStageDir() if err != nil { t.Fatalf("createRestoreStageDir error: %v", err) } - if !strings.HasPrefix(stageRoot, "/tmp/proxsave/restore-stage-") { + if !strings.Contains(stageRoot, "/tmp/proxsave/restore-stage-") { t.Fatalf("stageRoot=%q; want under /tmp/proxsave/restore-stage-*", stageRoot) } @@ -43,6 +40,32 @@ func TestCreateRestoreStageDir_Creates0700Directory(t *testing.T) { } } +func TestCreateRestoreStageDir_UniqueBetweenCalls(t *testing.T) { + origFS := restoreFS + origTime := restoreTime + t.Cleanup(func() { + restoreFS = origFS + restoreTime = origTime + }) + + fake := NewFakeFS() + t.Cleanup(func() { _ = os.RemoveAll(fake.Root) }) + restoreFS = fake + restoreTime = &FakeTime{Current: time.Unix(1700000000, 0)} + + first, err := createRestoreStageDir() + if err != nil { + t.Fatalf("first createRestoreStageDir error: %v", err) + } + second, err := createRestoreStageDir() + if err != nil { + t.Fatalf("second createRestoreStageDir error: %v", err) + } + if first == second { + t.Fatalf("stageRoot collision: %q", first) + } +} + func TestCleanupOldRestoreStageDirs_RemovesOnlyOldDirs(t *testing.T) { fake := NewFakeFS() t.Cleanup(func() { _ = os.RemoveAll(fake.Root) }) From dedc61c2cb740f05b91d429d7ae8d90a17f6b645 Mon Sep 17 00:00:00 2001 From: Damiano <71268257+tis24dev@users.noreply.github.com> Date: Mon, 16 Feb 2026 20:57:40 +0100 Subject: [PATCH 19/24] Harden file permissions and create rollback logs Tighten file permissions and ensure rollback logs are created. Use OpenFile to create the backup archive with 0600, change various WriteFile calls to create files with 0600 (markers, scripts, location file) instead of more permissive modes, and create empty rollback log files before writing markers/scripts. Also remove redundant shebang lines from generated rollback scripts. Affected files: internal/orchestrator/backup_safety.go, network_apply.go, restore_access_control_ui.go, restore_firewall.go, restore_ha.go. These changes improve security of stored artifacts and rollback handling. --- internal/orchestrator/backup_safety.go | 4 ++-- internal/orchestrator/network_apply.go | 19 ++++++++++++++----- .../orchestrator/restore_access_control_ui.go | 10 ++++++---- internal/orchestrator/restore_firewall.go | 9 ++++++--- internal/orchestrator/restore_ha.go | 9 ++++++--- 5 files changed, 34 insertions(+), 17 deletions(-) diff --git a/internal/orchestrator/backup_safety.go b/internal/orchestrator/backup_safety.go index 5402897..af3c982 100644 --- a/internal/orchestrator/backup_safety.go +++ b/internal/orchestrator/backup_safety.go @@ -90,7 +90,7 @@ func createSafetyBackup(logger *logging.Logger, selectedCategories []Category, d logger.Info("Creating %s of current configuration...", strings.ToLower(desc)) logger.Debug("%s will be saved to: %s", desc, backupArchive) - file, err := safetyFS.Create(backupArchive) + file, err := safetyFS.OpenFile(backupArchive, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o600) if err != nil { return nil, fmt.Errorf("create backup archive: %w", err) } @@ -186,7 +186,7 @@ func createSafetyBackup(logger *logging.Logger, selectedCategories []Category, d if spec.WriteLocationFile && locationFileName != "" { locationFile := filepath.Join(baseDir, locationFileName) - if err := safetyFS.WriteFile(locationFile, []byte(backupArchive), 0644); err != nil { + if err := safetyFS.WriteFile(locationFile, []byte(backupArchive), 0o600); err != nil { logger.Warning("Could not write backup location file: %v", err) } else { logger.Info("Backup location saved to: %s", locationFile) diff --git a/internal/orchestrator/network_apply.go b/internal/orchestrator/network_apply.go index dec3303..6a7fa27 100644 --- a/internal/orchestrator/network_apply.go +++ b/internal/orchestrator/network_apply.go @@ -254,15 +254,20 @@ func armNetworkRollback(ctx context.Context, logger *logging.Logger, backupPath } logging.DebugStep(logger, "arm network rollback", "Handle created: marker=%s script=%s log=%s", handle.markerPath, handle.scriptPath, handle.logPath) + logging.DebugStep(logger, "arm network rollback", "Create rollback log: %s", handle.logPath) + if err := restoreFS.WriteFile(handle.logPath, []byte(""), 0o600); err != nil { + return nil, fmt.Errorf("create rollback log: %w", err) + } + logging.DebugStep(logger, "arm network rollback", "Write rollback marker: %s", handle.markerPath) - if err := restoreFS.WriteFile(handle.markerPath, []byte("pending\n"), 0o640); err != nil { + if err := restoreFS.WriteFile(handle.markerPath, []byte("pending\n"), 0o600); err != nil { return nil, fmt.Errorf("write rollback marker: %w", err) } logging.DebugStep(logger, "arm network rollback", "Marker written successfully") logging.DebugStep(logger, "arm network rollback", "Write rollback script: %s", handle.scriptPath) script := buildRollbackScript(handle.markerPath, backupPath, handle.logPath, true) - if err := restoreFS.WriteFile(handle.scriptPath, []byte(script), 0o640); err != nil { + if err := restoreFS.WriteFile(handle.scriptPath, []byte(script), 0o600); err != nil { return nil, fmt.Errorf("write rollback script: %w", err) } logging.DebugStep(logger, "arm network rollback", "Script written successfully (%d bytes)", len(script)) @@ -593,14 +598,19 @@ func rollbackNetworkFilesNow(ctx context.Context, logger *logging.Logger, backup scriptPath := filepath.Join(baseDir, fmt.Sprintf("network_rollback_now_%s.sh", timestamp)) logPath = filepath.Join(baseDir, fmt.Sprintf("network_rollback_now_%s.log", timestamp)) + logging.DebugStep(logger, "rollback network files", "Create rollback log: %s", logPath) + if err := restoreFS.WriteFile(logPath, []byte(""), 0o600); err != nil { + return "", fmt.Errorf("create rollback log: %w", err) + } + logging.DebugStep(logger, "rollback network files", "Write rollback marker: %s", markerPath) - if err := restoreFS.WriteFile(markerPath, []byte("pending\n"), 0o640); err != nil { + if err := restoreFS.WriteFile(markerPath, []byte("pending\n"), 0o600); err != nil { return "", fmt.Errorf("write rollback marker: %w", err) } logging.DebugStep(logger, "rollback network files", "Write rollback script: %s", scriptPath) script := buildRollbackScript(markerPath, backupPath, logPath, false) - if err := restoreFS.WriteFile(scriptPath, []byte(script), 0o640); err != nil { + if err := restoreFS.WriteFile(scriptPath, []byte(script), 0o600); err != nil { _ = restoreFS.Remove(markerPath) return "", fmt.Errorf("write rollback script: %w", err) } @@ -623,7 +633,6 @@ func rollbackNetworkFilesNow(ctx context.Context, logger *logging.Logger, backup func buildRollbackScript(markerPath, backupPath, logPath string, restartNetworking bool) string { lines := []string{ - "#!/bin/sh", "set -eu", fmt.Sprintf("LOG=%s", shellQuote(logPath)), fmt.Sprintf("MARKER=%s", shellQuote(markerPath)), diff --git a/internal/orchestrator/restore_access_control_ui.go b/internal/orchestrator/restore_access_control_ui.go index 2fecd71..475d8a5 100644 --- a/internal/orchestrator/restore_access_control_ui.go +++ b/internal/orchestrator/restore_access_control_ui.go @@ -357,12 +357,16 @@ func armAccessControlRollback(ctx context.Context, logger *logging.Logger, backu timeout: timeout, } - if err := restoreFS.WriteFile(handle.markerPath, []byte("pending\n"), 0o640); err != nil { + if err := restoreFS.WriteFile(handle.logPath, []byte(""), 0o600); err != nil { + return nil, fmt.Errorf("create rollback log: %w", err) + } + + if err := restoreFS.WriteFile(handle.markerPath, []byte("pending\n"), 0o600); err != nil { return nil, fmt.Errorf("write rollback marker: %w", err) } script := buildAccessControlRollbackScript(handle.markerPath, backupPath, handle.logPath) - if err := restoreFS.WriteFile(handle.scriptPath, []byte(script), 0o640); err != nil { + if err := restoreFS.WriteFile(handle.scriptPath, []byte(script), 0o600); err != nil { return nil, fmt.Errorf("write rollback script: %w", err) } @@ -427,7 +431,6 @@ func buildAccessControlRollbackScript(markerPath, backupPath, logPath string) st } lines := []string{ - "#!/bin/sh", "set -eu", fmt.Sprintf("LOG=%s", shellQuote(logPath)), fmt.Sprintf("MARKER=%s", shellQuote(markerPath)), @@ -488,4 +491,3 @@ func buildAccessControlRollbackScript(markerPath, backupPath, logPath string) st ) return strings.Join(lines, "\n") + "\n" } - diff --git a/internal/orchestrator/restore_firewall.go b/internal/orchestrator/restore_firewall.go index 4b91b90..853e5b6 100644 --- a/internal/orchestrator/restore_firewall.go +++ b/internal/orchestrator/restore_firewall.go @@ -434,12 +434,16 @@ func armFirewallRollback(ctx context.Context, logger *logging.Logger, backupPath timeout: timeout, } - if err := restoreFS.WriteFile(handle.markerPath, []byte("pending\n"), 0o640); err != nil { + if err := restoreFS.WriteFile(handle.logPath, []byte(""), 0o600); err != nil { + return nil, fmt.Errorf("create rollback log: %w", err) + } + + if err := restoreFS.WriteFile(handle.markerPath, []byte("pending\n"), 0o600); err != nil { return nil, fmt.Errorf("write rollback marker: %w", err) } script := buildFirewallRollbackScript(handle.markerPath, backupPath, handle.logPath) - if err := restoreFS.WriteFile(handle.scriptPath, []byte(script), 0o640); err != nil { + if err := restoreFS.WriteFile(handle.scriptPath, []byte(script), 0o600); err != nil { return nil, fmt.Errorf("write rollback script: %w", err) } @@ -501,7 +505,6 @@ func disarmFirewallRollback(ctx context.Context, logger *logging.Logger, handle func buildFirewallRollbackScript(markerPath, backupPath, logPath string) string { lines := []string{ - "#!/bin/sh", "set -eu", fmt.Sprintf("LOG=%s", shellQuote(logPath)), fmt.Sprintf("MARKER=%s", shellQuote(markerPath)), diff --git a/internal/orchestrator/restore_ha.go b/internal/orchestrator/restore_ha.go index d69db66..a48227e 100644 --- a/internal/orchestrator/restore_ha.go +++ b/internal/orchestrator/restore_ha.go @@ -364,12 +364,16 @@ func armHARollback(ctx context.Context, logger *logging.Logger, backupPath strin timeout: timeout, } - if err := restoreFS.WriteFile(handle.markerPath, []byte("pending\n"), 0o640); err != nil { + if err := restoreFS.WriteFile(handle.logPath, []byte(""), 0o600); err != nil { + return nil, fmt.Errorf("create rollback log: %w", err) + } + + if err := restoreFS.WriteFile(handle.markerPath, []byte("pending\n"), 0o600); err != nil { return nil, fmt.Errorf("write rollback marker: %w", err) } script := buildHARollbackScript(handle.markerPath, backupPath, handle.logPath) - if err := restoreFS.WriteFile(handle.scriptPath, []byte(script), 0o640); err != nil { + if err := restoreFS.WriteFile(handle.scriptPath, []byte(script), 0o600); err != nil { return nil, fmt.Errorf("write rollback script: %w", err) } @@ -426,7 +430,6 @@ func disarmHARollback(ctx context.Context, logger *logging.Logger, handle *haRol func buildHARollbackScript(markerPath, backupPath, logPath string) string { lines := []string{ - "#!/bin/sh", "set -eu", fmt.Sprintf("LOG=%s", shellQuote(logPath)), fmt.Sprintf("MARKER=%s", shellQuote(markerPath)), From b2e6f3e862b683d576ae056bd447156386004107 Mon Sep 17 00:00:00 2001 From: Damiano <71268257+tis24dev@users.noreply.github.com> Date: Tue, 17 Feb 2026 22:53:09 +0100 Subject: [PATCH 20/24] Downgrade unprivileged container failures to SKIP Detect unprivileged/container user-namespace contexts and treat known privilege-sensitive command failures as informational SKIPs instead of warnings. Adds collector_privilege_sensitive.go implementing detection via /proc/self/{uid_map,gid_map} and heuristics for commands (dmidecode, blkid, sensors, smartctl), wires a new DetectUnprivilegedContainer dep, and updates Collector.safeCmdOutput and captureCommandOutput to log SKIP with helpful details/restore hints. Includes unit tests for detection and behavior, and documentation updates (CLI, restore guides, troubleshooting) explaining SKIP labels and impact on automated /etc/fstab remapping. Minor formatting/field alignment changes in CollectorConfig/defaults. --- docs/CLI_REFERENCE.md | 16 ++ docs/RESTORE_GUIDE.md | 2 +- docs/RESTORE_TECHNICAL.md | 1 + docs/TROUBLESHOOTING.md | 29 +++ internal/backup/collector.go | 111 +++++++---- internal/backup/collector_deps.go | 9 +- .../backup/collector_privilege_sensitive.go | 173 ++++++++++++++++++ .../collector_privilege_sensitive_test.go | 146 +++++++++++++++ 8 files changed, 448 insertions(+), 39 deletions(-) create mode 100644 internal/backup/collector_privilege_sensitive.go create mode 100644 internal/backup/collector_privilege_sensitive_test.go diff --git a/docs/CLI_REFERENCE.md b/docs/CLI_REFERENCE.md index eae8335..d32a00d 100644 --- a/docs/CLI_REFERENCE.md +++ b/docs/CLI_REFERENCE.md @@ -496,6 +496,22 @@ If you want to remove those guards manually (optional): - `--log-level` (CLI flag): Controls logging verbosity - `DEBUG_LEVEL` (config): Controls operation detail level (`standard`/`advanced`/`extreme`) +### Log Labels (PHASE/STEP/SKIP) + +Some log lines use a label to make the output easier to scan: + +| Label | Level | Meaning | +|-------|-------|---------| +| `PHASE` | `info` | High-level workflow phase marker | +| `STEP` | `info` | A notable step within a phase | +| `SKIP` | `info` | Optional item intentionally skipped or not applicable | + +**Common `SKIP` examples**: +- A feature is disabled by configuration. +- A non-critical CLI tool is not installed. +- Running in an **unprivileged container/rootless** environment where low-level inventory commands are expected to fail (for example `dmidecode` or `blkid`). In this case, ProxSave still attempts the collection, but logs a `SKIP` (not a `WARNING`) when the failure matches known “missing privileges” patterns. + - For `blkid`, the skip reason also includes a restore hint: automated `/etc/fstab` device remap (UUID/PARTUUID/LABEL) may be limited. + ### Flag Reference | Flag | Short | Description | diff --git a/docs/RESTORE_GUIDE.md b/docs/RESTORE_GUIDE.md index 2b6468a..0fae87d 100644 --- a/docs/RESTORE_GUIDE.md +++ b/docs/RESTORE_GUIDE.md @@ -1922,7 +1922,7 @@ If the restore includes filesystem configuration (notably `/etc/fstab`), ProxSav - Compares the current `/etc/fstab` with the backup copy. - Keeps existing critical entries (for example, root and swap) when they already match the running system. - Detects **safe mount candidates** from the backup (for example, additional NFS mounts) and offers to add them. -- If ProxSave inventory data is present in the backup, ProxSave can remap **unstable** `/dev/*` devices from the backup (for example `/dev/sdb1`) to stable `UUID=`/`PARTUUID=`/`LABEL=` references **on the restore host** (only when the stable reference exists on the system). +- If ProxSave inventory data is present in the backup, ProxSave can remap **unstable** `/dev/*` devices from the backup (for example `/dev/sdb1`) to stable `UUID=`/`PARTUUID=`/`LABEL=` references **on the restore host** (only when the stable reference exists on the system). Note: backups taken from an **unprivileged container/rootless** environment may not include usable block-device inventory, so automated remap can be limited/unavailable. - Normalizes restored entries by adding `nofail` (and `_netdev` for network mounts) so offline storage does not block boot/restore. **Safety behavior**: diff --git a/docs/RESTORE_TECHNICAL.md b/docs/RESTORE_TECHNICAL.md index d8f3bb8..5cdf984 100644 --- a/docs/RESTORE_TECHNICAL.md +++ b/docs/RESTORE_TECHNICAL.md @@ -1511,6 +1511,7 @@ When restoring to the real system root (`/`), ProxSave avoids blindly overwritin - If the backup contains ProxSave inventory (`var/lib/proxsave-info/commands/system/{blkid.txt,lsblk_json.json,lsblk.txt}` or PBS datastore inventory), ProxSave can remap unstable device paths from the backup (e.g. `/dev/sdb1`) to stable references (`UUID=`/`PARTUUID=`/`LABEL=`) **when the stable reference exists on the restore host**. - This reduces the risk of mounting the wrong disk after a reinstall where `/dev/sdX` ordering changes. +- Note: backups taken from an **unprivileged container/rootless** environment may not include usable block-device inventory (for example `blkid` output can be empty/skipped). In that case, automated device remap is limited/unavailable and `/etc/fstab` entries may require manual review during restore. **Normalization**: - Entries written by the merge are normalized to include `nofail` (and `_netdev` for network mounts) to prevent offline storage from blocking boot/restore. diff --git a/docs/TROUBLESHOOTING.md b/docs/TROUBLESHOOTING.md index 90a15d4..7f179e3 100644 --- a/docs/TROUBLESHOOTING.md +++ b/docs/TROUBLESHOOTING.md @@ -168,6 +168,35 @@ COMPRESSION_TYPE=xz # Valid: xz, zstd, gzip, bzip2, lz4 --- +#### Notice: `SKIP ... Expected in unprivileged containers` (LXC/rootless) + +**Symptoms**: +- Running ProxSave inside an **unprivileged** LXC container (or a rootless container) produces log lines like: + - `SKIP Skipping Hardware DMI information: command \`dmidecode\` failed (...). Expected in unprivileged containers (...)` + - `SKIP Skipping Block device identifiers (blkid): command \`blkid\` failed (...). Expected in unprivileged containers (block devices not accessible; restore hint: automated fstab device remap (UUID/PARTUUID/LABEL) may be limited)` + +**Cause**: In unprivileged containers, access to low-level system interfaces is intentionally restricted (for example `/dev/mem` and most block devices). Commands like `dmidecode` and `blkid` can fail even though the backup itself is working correctly. + +**Behavior**: +- ProxSave still attempts to collect the data. +- When the failure matches a known “missing privileges” pattern for a small allowlist of commands, it is logged as `SKIP` (info) instead of `WARNING`. +- Other failures are **not** downgraded and still appear as warnings/errors. + +**Impact**: +- Hardware inventory output may be missing/empty. +- If `blkid` is skipped, ProxSave restore may have **limited** ability to automatically remap `/etc/fstab` devices (UUID/PARTUUID/LABEL). You may need to review mounts manually during restore. + +**How to verify** (unprivileged user namespace mapping): +```bash +cat /proc/self/uid_map +cat /proc/self/gid_map +# If the second column is non-zero (e.g. "0 100000 65536"), you're in a shifted/unprivileged mapping. +``` + +**Optional**: If you want to hide `SKIP` lines on the console, run with `--log-level warning` (this also hides normal info logs). + +--- + ### 3. Cloud Storage Issues #### Error: `rclone not found in PATH` diff --git a/internal/backup/collector.go b/internal/backup/collector.go index 676a9f8..c541eee 100644 --- a/internal/backup/collector.go +++ b/internal/backup/collector.go @@ -155,23 +155,23 @@ type CollectorConfig struct { CephConfigPath string // PBS-specific collection options - BackupDatastoreConfigs bool - BackupPBSS3Endpoints bool - BackupPBSNodeConfig bool - BackupPBSAcmeAccounts bool - BackupPBSAcmePlugins bool - BackupPBSMetricServers bool - BackupPBSTrafficControl bool - BackupPBSNotifications bool + BackupDatastoreConfigs bool + BackupPBSS3Endpoints bool + BackupPBSNodeConfig bool + BackupPBSAcmeAccounts bool + BackupPBSAcmePlugins bool + BackupPBSMetricServers bool + BackupPBSTrafficControl bool + BackupPBSNotifications bool BackupPBSNotificationsPriv bool - BackupUserConfigs bool - BackupRemoteConfigs bool - BackupSyncJobs bool - BackupVerificationJobs bool - BackupTapeConfigs bool - BackupPBSNetworkConfig bool - BackupPruneSchedules bool - BackupPxarFiles bool + BackupUserConfigs bool + BackupRemoteConfigs bool + BackupSyncJobs bool + BackupVerificationJobs bool + BackupTapeConfigs bool + BackupPBSNetworkConfig bool + BackupPruneSchedules bool + BackupPxarFiles bool // System collection options BackupNetworkConfigs bool @@ -333,23 +333,23 @@ func GetDefaultCollectorConfig() *CollectorConfig { CephConfigPath: "/etc/ceph", // PBS-specific (all enabled by default) - BackupDatastoreConfigs: true, - BackupPBSS3Endpoints: true, - BackupPBSNodeConfig: true, - BackupPBSAcmeAccounts: true, - BackupPBSAcmePlugins: true, - BackupPBSMetricServers: true, - BackupPBSTrafficControl: true, - BackupPBSNotifications: true, - BackupPBSNotificationsPriv: true, - BackupUserConfigs: true, - BackupRemoteConfigs: true, - BackupSyncJobs: true, - BackupVerificationJobs: true, - BackupTapeConfigs: true, - BackupPBSNetworkConfig: true, - BackupPruneSchedules: true, - BackupPxarFiles: true, + BackupDatastoreConfigs: true, + BackupPBSS3Endpoints: true, + BackupPBSNodeConfig: true, + BackupPBSAcmeAccounts: true, + BackupPBSAcmePlugins: true, + BackupPBSMetricServers: true, + BackupPBSTrafficControl: true, + BackupPBSNotifications: true, + BackupPBSNotificationsPriv: true, + BackupUserConfigs: true, + BackupRemoteConfigs: true, + BackupSyncJobs: true, + BackupVerificationJobs: true, + BackupTapeConfigs: true, + BackupPBSNetworkConfig: true, + BackupPruneSchedules: true, + BackupPxarFiles: true, // System collection (all enabled by default) BackupNetworkConfigs: true, @@ -928,11 +928,36 @@ func (c *Collector) safeCmdOutput(ctx context.Context, cmd, output, description c.incFilesFailed() return fmt.Errorf("critical command `%s` failed for %s: %w (output: %s)", cmdString, description, err, summarizeCommandOutputText(string(out))) } + + exitCode := -1 + var exitErr *exec.ExitError + if errors.As(err, &exitErr) { + exitCode = exitErr.ExitCode() + } + outputText := strings.TrimSpace(string(out)) + if ctxInfo := c.depDetectUnprivilegedContainer(); ctxInfo.Detected { + if reason := privilegeSensitiveFailureReason(cmdParts[0], exitCode, outputText); reason != "" { + details := strings.TrimSpace(ctxInfo.Details) + if details != "" { + details = " (" + details + ")" + } + c.logger.Skip("Skipping %s: command `%s` failed (%v). Expected in unprivileged containers%s (%s). Non-critical; backup continues.", + description, + cmdString, + err, + details, + reason, + ) + c.logger.Debug("Skip details for %s: output: %s", description, summarizeCommandOutputText(outputText)) + return nil + } + } + c.logger.Warning("Skipping %s: command `%s` failed (%v). Non-critical; backup continues. Ensure the required CLI is available and has proper permissions. Output: %s", description, cmdString, err, - summarizeCommandOutputText(string(out)), + summarizeCommandOutputText(outputText), ) return nil // Non-critical failure } @@ -1258,6 +1283,24 @@ func (c *Collector) captureCommandOutput(ctx context.Context, cmd, output, descr } } + if ctxInfo := c.depDetectUnprivilegedContainer(); ctxInfo.Detected { + if reason := privilegeSensitiveFailureReason(parts[0], exitCode, outputText); reason != "" { + details := strings.TrimSpace(ctxInfo.Details) + if details != "" { + details = " (" + details + ")" + } + c.logger.Skip("Skipping %s: command `%s` failed (%v). Expected in unprivileged containers%s (%s). Non-critical; backup continues.", + description, + cmdString, + err, + details, + reason, + ) + c.logger.Debug("Skip details for %s: output: %s", description, summarizeCommandOutputText(outputText)) + return nil, nil + } + } + c.logger.Warning("Skipping %s: command `%s` failed (%v). Non-critical; backup continues. Output: %s", description, cmdString, diff --git a/internal/backup/collector_deps.go b/internal/backup/collector_deps.go index cbb9aff..6cb980a 100644 --- a/internal/backup/collector_deps.go +++ b/internal/backup/collector_deps.go @@ -26,10 +26,11 @@ var ( // CollectorDeps allows injecting external dependencies for the Collector. type CollectorDeps struct { - LookPath func(string) (string, error) - RunCommandWithEnv func(context.Context, []string, string, ...string) ([]byte, error) - RunCommand func(context.Context, string, ...string) ([]byte, error) - Stat func(string) (os.FileInfo, error) + LookPath func(string) (string, error) + RunCommandWithEnv func(context.Context, []string, string, ...string) ([]byte, error) + RunCommand func(context.Context, string, ...string) ([]byte, error) + Stat func(string) (os.FileInfo, error) + DetectUnprivilegedContainer func() (bool, string) } func defaultCollectorDeps() CollectorDeps { diff --git a/internal/backup/collector_privilege_sensitive.go b/internal/backup/collector_privilege_sensitive.go new file mode 100644 index 0000000..8138468 --- /dev/null +++ b/internal/backup/collector_privilege_sensitive.go @@ -0,0 +1,173 @@ +package backup + +import ( + "os" + "strconv" + "strings" +) + +const ( + uidMapPath = "/proc/self/uid_map" + gidMapPath = "/proc/self/gid_map" + systemdContainerPath = "/run/systemd/container" +) + +type unprivilegedContainerContext struct { + Detected bool + Details string +} + +func (c *Collector) depDetectUnprivilegedContainer() unprivilegedContainerContext { + if c == nil { + return unprivilegedContainerContext{} + } + if c.deps.DetectUnprivilegedContainer != nil { + ok, details := c.deps.DetectUnprivilegedContainer() + return unprivilegedContainerContext{Detected: ok, Details: strings.TrimSpace(details)} + } + ok, details := detectUnprivilegedContainer() + return unprivilegedContainerContext{Detected: ok, Details: details} +} + +// detectUnprivilegedContainer attempts to determine whether ProxSave is running in an +// "unprivileged container"-like context where low-level hardware/block access is typically +// restricted. +// +// Implementation note: +// - We primarily rely on user-namespace UID/GID maps. When UID/GID 0 inside maps to a +// non-zero host ID, we treat it as "unprivileged" (common for LXC unprivileged containers). +// - Container flavor is best-effort via /run/systemd/container (if present). +// +// The detection is intentionally conservative in what it changes: it is only used to +// downgrade *known privilege-sensitive command failures* from WARNING to SKIP. +func detectUnprivilegedContainer() (bool, string) { + uidShifted, uidHost := parseRootIDMapShift(readSmallFile(uidMapPath)) + gidShifted, gidHost := parseRootIDMapShift(readSmallFile(gidMapPath)) + if !uidShifted && !gidShifted { + return false, "" + } + + var parts []string + if uidShifted { + parts = append(parts, "uid_map=0->"+strconv.FormatUint(uidHost, 10)) + } + if gidShifted { + parts = append(parts, "gid_map=0->"+strconv.FormatUint(gidHost, 10)) + } + + if container := strings.TrimSpace(readSmallFile(systemdContainerPath)); container != "" { + parts = append(parts, "container="+container) + } + + return true, strings.Join(parts, " ") +} + +func readSmallFile(path string) string { + data, err := os.ReadFile(path) + if err != nil || len(data) == 0 { + return "" + } + // Avoid leaking NUL-separated content (e.g., /proc/*/environ). + return strings.ReplaceAll(string(data), "\x00", " ") +} + +// parseRootIDMapShift checks whether the mapping for UID/GID 0 is shifted (i.e., maps to a +// non-zero host ID). Returns (true, hostStart) when shifted. +func parseRootIDMapShift(content string) (bool, uint64) { + content = strings.TrimSpace(content) + if content == "" { + return false, 0 + } + for _, line := range strings.Split(content, "\n") { + fields := strings.Fields(strings.TrimSpace(line)) + if len(fields) < 3 { + continue + } + insideStart, err1 := strconv.ParseUint(fields[0], 10, 64) + hostStart, err2 := strconv.ParseUint(fields[1], 10, 64) + length, err3 := strconv.ParseUint(fields[2], 10, 64) + if err1 != nil || err2 != nil || err3 != nil { + continue + } + if length == 0 { + continue + } + // We only care about the range that covers "root" inside the namespace (UID/GID 0). + if insideStart == 0 { + if hostStart == 0 { + return false, 0 + } + return true, hostStart + } + } + return false, 0 +} + +func isPrivilegeSensitiveFailureCandidate(command string) bool { + switch command { + case "dmidecode", "blkid", "sensors", "smartctl": + return true + default: + return false + } +} + +func privilegeSensitiveFailureReason(command string, exitCode int, outputText string) string { + command = strings.TrimSpace(command) + if command == "" { + return "" + } + if !isPrivilegeSensitiveFailureCandidate(command) { + return "" + } + + lower := strings.ToLower(strings.TrimSpace(outputText)) + hasPerm := containsAny(lower, + "permission denied", + "operation not permitted", + "not permitted", + "access denied", + ) + + switch command { + case "dmidecode": + // dmidecode typically fails due to restricted access to DMI tables (/sys/firmware/dmi or /dev/mem). + if hasPerm || strings.Contains(lower, "/dev/mem") || strings.Contains(lower, "/sys/firmware/dmi") { + return "DMI tables not accessible" + } + case "blkid": + // In unprivileged LXC, blkid often exits 2 with empty output when block devices are not accessible. + if exitCode == 2 && lower == "" { + return "block devices not accessible; restore hint: automated fstab device remap (UUID/PARTUUID/LABEL) may be limited" + } + if hasPerm { + return "block devices not accessible; restore hint: automated fstab device remap (UUID/PARTUUID/LABEL) may be limited" + } + case "sensors": + // "No sensors found!" is common in virtualized/containerized environments. + if strings.Contains(lower, "no sensors found") { + return "no hardware sensors available" + } + if hasPerm { + return "hardware sensors not accessible" + } + case "smartctl": + if hasPerm { + return "SMART devices not accessible" + } + } + + return "" +} + +func containsAny(haystack string, needles ...string) bool { + for _, needle := range needles { + if needle == "" { + continue + } + if strings.Contains(haystack, needle) { + return true + } + } + return false +} diff --git a/internal/backup/collector_privilege_sensitive_test.go b/internal/backup/collector_privilege_sensitive_test.go new file mode 100644 index 0000000..ffb3ae1 --- /dev/null +++ b/internal/backup/collector_privilege_sensitive_test.go @@ -0,0 +1,146 @@ +package backup + +import ( + "bytes" + "context" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + + "github.com/tis24dev/proxsave/internal/logging" + "github.com/tis24dev/proxsave/internal/types" +) + +func TestParseRootIDMapShift(t *testing.T) { + t.Run("identity mapping", func(t *testing.T) { + shifted, host := parseRootIDMapShift("0 0 4294967295\n") + if shifted || host != 0 { + t.Fatalf("shifted=%v host=%d; want false,0", shifted, host) + } + }) + + t.Run("shifted mapping", func(t *testing.T) { + shifted, host := parseRootIDMapShift("0 100000 65536\n") + if !shifted || host != 100000 { + t.Fatalf("shifted=%v host=%d; want true,100000", shifted, host) + } + }) + + t.Run("missing root range", func(t *testing.T) { + shifted, host := parseRootIDMapShift("1 100000 65536\n") + if shifted || host != 0 { + t.Fatalf("shifted=%v host=%d; want false,0", shifted, host) + } + }) +} + +func TestPrivilegeSensitiveFailureReason(t *testing.T) { + cases := []struct { + name string + command string + exitCode int + output string + want string + }{ + {"dmidecode perm", "dmidecode", 1, "/dev/mem: Permission denied", "DMI tables not accessible"}, + {"blkid exit2 empty", "blkid", 2, "", "block devices not accessible; restore hint: automated fstab device remap (UUID/PARTUUID/LABEL) may be limited"}, + {"blkid perm", "blkid", 2, "Permission denied", "block devices not accessible; restore hint: automated fstab device remap (UUID/PARTUUID/LABEL) may be limited"}, + {"sensors none", "sensors", 1, "No sensors found!", "no hardware sensors available"}, + {"smartctl perm", "smartctl", 1, "Permission denied", "SMART devices not accessible"}, + {"other ignored", "false", 1, "Permission denied", ""}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := privilegeSensitiveFailureReason(tc.command, tc.exitCode, tc.output) + if got != tc.want { + t.Fatalf("reason=%q; want %q", got, tc.want) + } + }) + } +} + +func TestSafeCmdOutput_DowngradesPrivilegeSensitiveFailureToSkip(t *testing.T) { + logger := logging.New(types.LogLevelInfo, false) + buf := &bytes.Buffer{} + logger.SetOutput(buf) + + cfg := GetDefaultCollectorConfig() + tmp := t.TempDir() + + deps := CollectorDeps{ + LookPath: func(string) (string, error) { return "/usr/sbin/dmidecode", nil }, + RunCommand: func(ctx context.Context, name string, args ...string) ([]byte, error) { + cmd := exec.Command("sh", "-c", "echo '/dev/mem: Permission denied' >&2; exit 1") + out, err := cmd.CombinedOutput() + return out, err + }, + DetectUnprivilegedContainer: func() (bool, string) { + return true, "uid_map=0->100000 container=lxc" + }, + } + c := NewCollectorWithDeps(logger, cfg, tmp, types.ProxmoxUnknown, false, deps) + + outPath := filepath.Join(tmp, "dmidecode.txt") + if err := c.safeCmdOutput(context.Background(), "dmidecode", outPath, "Hardware DMI information", false); err != nil { + t.Fatalf("safeCmdOutput error: %v", err) + } + + logText := buf.String() + if !strings.Contains(logText, "SKIP") { + t.Fatalf("expected SKIP in logs, got: %s", logText) + } + if strings.Contains(logText, "WARNING") { + t.Fatalf("expected no WARNING in logs, got: %s", logText) + } + if _, err := os.Stat(outPath); !os.IsNotExist(err) { + t.Fatalf("expected no output file to be created, stat err=%v", err) + } +} + +func TestCaptureCommandOutput_DowngradesBlkidExit2ToSkipInUnprivilegedContainer(t *testing.T) { + logger := logging.New(types.LogLevelInfo, false) + buf := &bytes.Buffer{} + logger.SetOutput(buf) + + cfg := GetDefaultCollectorConfig() + tmp := t.TempDir() + + deps := CollectorDeps{ + LookPath: func(string) (string, error) { return "/sbin/blkid", nil }, + RunCommand: func(ctx context.Context, name string, args ...string) ([]byte, error) { + cmd := exec.Command("sh", "-c", "exit 2") + out, err := cmd.CombinedOutput() + return out, err + }, + DetectUnprivilegedContainer: func() (bool, string) { + return true, "uid_map=0->100000 container=lxc" + }, + } + c := NewCollectorWithDeps(logger, cfg, tmp, types.ProxmoxUnknown, false, deps) + + outPath := filepath.Join(tmp, "blkid.txt") + data, err := c.captureCommandOutput(context.Background(), "blkid", outPath, "Block device identifiers (blkid)", false) + if err != nil { + t.Fatalf("captureCommandOutput returned error: %v", err) + } + if data != nil { + t.Fatalf("expected nil data on non-critical failure, got %q", string(data)) + } + + logText := buf.String() + if !strings.Contains(logText, "SKIP") { + t.Fatalf("expected SKIP in logs, got: %s", logText) + } + if !strings.Contains(strings.ToLower(logText), "restore hint") { + t.Fatalf("expected restore hint in logs, got: %s", logText) + } + if strings.Contains(logText, "WARNING") { + t.Fatalf("expected no WARNING in logs, got: %s", logText) + } + if _, err := os.Stat(outPath); !os.IsNotExist(err) { + t.Fatalf("expected no output file to be created, stat err=%v", err) + } +} From 7c39b2a47a4132e4415b67a9e7f82e77d8533e33 Mon Sep 17 00:00:00 2001 From: Damiano <71268257+tis24dev@users.noreply.github.com> Date: Fri, 20 Feb 2026 13:53:14 +0100 Subject: [PATCH 21/24] Add PVESH timeout and PVE storage runtime info Introduce PVESH_TIMEOUT configuration and propagate it through config, templates, docs and the orchestrator; the collector now applies a per-call timeout to pvesh invocations. Extend PVE storage parsing to capture runtime fields (active, enabled, status) from pvesh output with tolerant parsing of bool/int/string forms, include runtime info in logging, and skip storages that appear unavailable to reduce hangs. Also add helpers for formatting runtime metadata. Separately, enhance lock-file handling in checks: parse pid/host/time metadata, perform same-host PID liveness checks (with injectable killFunc) and remove stale locks when appropriate; update related tests to cover PID-not-running behavior and adjust stat-failure test. Update unit tests for PVE storage parsing to cover runtime fields. --- docs/BACKUP_ENV_MAPPING.md | 1 + docs/CONFIGURATION.md | 3 + internal/backup/collector.go | 19 ++- internal/backup/collector_pve.go | 100 +++++++++++++-- internal/backup/collector_pve_parse_test.go | 30 ++++- internal/checks/checks.go | 135 +++++++++++++++++--- internal/checks/checks_test.go | 57 ++++++++- internal/config/config.go | 5 + internal/config/templates/backup.env | 1 + internal/orchestrator/orchestrator.go | 1 + 10 files changed, 312 insertions(+), 40 deletions(-) diff --git a/docs/BACKUP_ENV_MAPPING.md b/docs/BACKUP_ENV_MAPPING.md index db10996..488b7dc 100644 --- a/docs/BACKUP_ENV_MAPPING.md +++ b/docs/BACKUP_ENV_MAPPING.md @@ -88,6 +88,7 @@ WEBHOOK_TIMEOUT = SAME ## Go-only variables (new) SYSTEM_ROOT_PREFIX = NEW (Go-only) → Override system root for collection (testing/chroot). Empty or "/" uses the real root. +PVESH_TIMEOUT = NEW (Go-only) → Timeout in seconds for `pvesh` calls during PVE collection (0 disables). NOTE: PBS restore behavior is selected interactively during `--restore` and is intentionally not configured via `backup.env`. BACKUP_PBS_S3_ENDPOINTS = NEW (Go-only) → Collect `s3.cfg` and S3 endpoint snapshots (PBS). BACKUP_PBS_NODE_CONFIG = NEW (Go-only) → Collect `node.cfg` and node snapshots (PBS). diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 6603768..60e15c1 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -958,6 +958,9 @@ BACKUP_PVE_SCHEDULES=true # Cron schedules # Replication BACKUP_PVE_REPLICATION=true # VM/CT replication config +# pvesh timeout +PVESH_TIMEOUT=15 # Timeout for pvesh calls during PVE collection (seconds; 0 disables) + # PVE backup files BACKUP_PVE_BACKUP_FILES=true # Include backup files from /var/lib/vz/dump BACKUP_SMALL_PVE_BACKUPS=false # Include small backups only diff --git a/internal/backup/collector.go b/internal/backup/collector.go index c541eee..93edfe9 100644 --- a/internal/backup/collector.go +++ b/internal/backup/collector.go @@ -153,6 +153,7 @@ type CollectorConfig struct { PVEBackupIncludePattern string BackupCephConfig bool CephConfigPath string + PveshTimeoutSeconds int // PBS-specific collection options BackupDatastoreConfigs bool @@ -331,6 +332,7 @@ func GetDefaultCollectorConfig() *CollectorConfig { PVEBackupIncludePattern: "", BackupCephConfig: true, CephConfigPath: "/etc/ceph", + PveshTimeoutSeconds: 15, // PBS-specific (all enabled by default) BackupDatastoreConfigs: true, @@ -922,7 +924,13 @@ func (c *Collector) safeCmdOutput(ctx context.Context, cmd, output, description } cmdString := strings.Join(cmdParts, " ") - out, err := c.depRunCommand(ctx, cmdParts[0], cmdParts[1:]...) + runCtx := ctx + cancel := func() {} + if cmdParts[0] == "pvesh" && c.config != nil && c.config.PveshTimeoutSeconds > 0 { + runCtx, cancel = context.WithTimeout(ctx, time.Duration(c.config.PveshTimeoutSeconds)*time.Second) + } + defer cancel() + out, err := c.depRunCommand(runCtx, cmdParts[0], cmdParts[1:]...) if err != nil { if critical { c.incFilesFailed() @@ -1251,7 +1259,14 @@ func (c *Collector) captureCommandOutput(ctx context.Context, cmd, output, descr return nil, nil } - out, err := c.depRunCommand(ctx, parts[0], parts[1:]...) + runCtx := ctx + cancel := func() {} + if parts[0] == "pvesh" && c.config != nil && c.config.PveshTimeoutSeconds > 0 { + runCtx, cancel = context.WithTimeout(ctx, time.Duration(c.config.PveshTimeoutSeconds)*time.Second) + } + defer cancel() + + out, err := c.depRunCommand(runCtx, parts[0], parts[1:]...) if err != nil { cmdString := strings.Join(parts, " ") if critical { diff --git a/internal/backup/collector_pve.go b/internal/backup/collector_pve.go index f8a6efa..910ec74 100644 --- a/internal/backup/collector_pve.go +++ b/internal/backup/collector_pve.go @@ -20,6 +20,13 @@ type pveStorageEntry struct { Path string Type string Content string + + // Runtime status fields from `pvesh get /nodes//storage`. + // These are optional and may be nil/empty depending on the data source + // (e.g. storage.cfg parsing has no runtime status). + Active *bool + Enabled *bool + Status string } type pveRuntimeInfo struct { @@ -653,17 +660,11 @@ func (c *Collector) collectPVECommands(ctx context.Context, clustered bool) (*pv c.logger.Debug("Skipping cluster runtime commands: BACKUP_CLUSTER_CONFIG=false (clustered=%v)", clustered) } - // Storage status hostname, _ := os.Hostname() nodeName := shortHostname(hostname) if nodeName == "" { nodeName = hostname } - c.safeCmdOutput(ctx, - fmt.Sprintf("pvesh get /nodes/%s/storage --output-format=json", nodeName), - filepath.Join(commandsDir, "storage_status.json"), - "Storage status", - false) // Disk list c.safeCmdOutput(ctx, @@ -718,15 +719,46 @@ func (c *Collector) collectPVECommands(ctx context.Context, clustered bool) (*pv func parseNodeStorageList(data []byte) ([]pveStorageEntry, error) { var raw []struct { - Storage string `json:"storage"` - Name string `json:"name"` - Path string `json:"path"` - Type string `json:"type"` - Content string `json:"content"` + Storage string `json:"storage"` + Name string `json:"name"` + Path string `json:"path"` + Type string `json:"type"` + Content string `json:"content"` + Active json.RawMessage `json:"active"` + Enabled json.RawMessage `json:"enabled"` + Status string `json:"status"` } if err := json.Unmarshal(data, &raw); err != nil { return nil, err } + + parseBool := func(raw json.RawMessage) *bool { + if len(raw) == 0 { + return nil + } + var b bool + if err := json.Unmarshal(raw, &b); err == nil { + return &b + } + var i int + if err := json.Unmarshal(raw, &i); err == nil { + v := i != 0 + return &v + } + var s string + if err := json.Unmarshal(raw, &s); err == nil { + switch strings.ToLower(strings.TrimSpace(s)) { + case "1", "true", "yes", "on": + v := true + return &v + case "0", "false", "no", "off": + v := false + return &v + } + } + return nil + } + seen := make(map[string]struct{}) entries := make([]pveStorageEntry, 0, len(raw)) for _, item := range raw { @@ -746,6 +778,9 @@ func parseNodeStorageList(data []byte) ([]pveStorageEntry, error) { Path: strings.TrimSpace(item.Path), Type: strings.TrimSpace(item.Type), Content: strings.TrimSpace(item.Content), + Active: parseBool(item.Active), + Enabled: parseBool(item.Enabled), + Status: strings.TrimSpace(item.Status), }) } return entries, nil @@ -992,6 +1027,41 @@ func (c *Collector) collectPVEStorageMetadata(ctx context.Context, storages []pv summary.WriteString("\n# Format: NAME|PATH|TYPE|CONTENT\n\n") processed := 0 + formatRuntime := func(storage pveStorageEntry) string { + parts := make([]string, 0, 3) + if status := strings.TrimSpace(storage.Status); status != "" { + parts = append(parts, "status="+status) + } + if storage.Active != nil { + parts = append(parts, fmt.Sprintf("active=%v", *storage.Active)) + } + if storage.Enabled != nil { + parts = append(parts, fmt.Sprintf("enabled=%v", *storage.Enabled)) + } + if len(parts) == 0 { + return "" + } + return " (" + strings.Join(parts, " ") + ")" + } + unavailableReason := func(storage pveStorageEntry) string { + if storage.Enabled != nil && !*storage.Enabled { + return "enabled=false" + } + if storage.Active != nil && !*storage.Active { + return "active=false" + } + if status := strings.ToLower(strings.TrimSpace(storage.Status)); status != "" { + switch status { + case "available", "active", "ok": + // Known-good states + case "unknown", "inactive", "disabled", "unavailable", "error": + return "status=" + status + default: + // Unknown status: do not skip based on this field alone. + } + } + return "" + } for _, storage := range storages { if storage.Path == "" { continue @@ -999,6 +1069,14 @@ func (c *Collector) collectPVEStorageMetadata(ctx context.Context, storages []pv if err := ctx.Err(); err != nil { return err } + if reason := unavailableReason(storage); reason != "" { + c.logger.Warning("Skipping datastore %s (path=%s)%s: not available (%s)", storage.Name, storage.Path, formatRuntime(storage), reason) + continue + } + + // NOTE: os.Stat() on an unreachable mount can hang inside the kernel. + // The availability filter above reduces the likelihood by skipping inactive/unavailable storages reported by PVE. + c.logger.Info("Processing datastore %s (path=%s)%s", storage.Name, storage.Path, formatRuntime(storage)) if stat, err := os.Stat(storage.Path); err != nil || !stat.IsDir() { c.logger.Debug("Skipping datastore %s (path not accessible: %s)", storage.Name, storage.Path) continue diff --git a/internal/backup/collector_pve_parse_test.go b/internal/backup/collector_pve_parse_test.go index 8e3b96c..39e18b6 100644 --- a/internal/backup/collector_pve_parse_test.go +++ b/internal/backup/collector_pve_parse_test.go @@ -8,6 +8,7 @@ import ( // TestParseNodeStorageList tests parsing PVE storage entries from JSON func TestParseNodeStorageList(t *testing.T) { + boolPtr := func(v bool) *bool { return &v } tests := []struct { name string input string @@ -99,16 +100,28 @@ func TestParseNodeStorageList(t *testing.T) { { name: "mixed valid and empty entries", input: `[ - {"storage": "storage1", "path": "/path1", "type": "dir", "content": "iso"}, - {"storage": "", "path": "/path2", "type": "dir", "content": "backup"}, - {"storage": "storage2", "path": "/path3", "type": "nfs", "content": "images"} - ]`, + {"storage": "storage1", "path": "/path1", "type": "dir", "content": "iso"}, + {"storage": "", "path": "/path2", "type": "dir", "content": "backup"}, + {"storage": "storage2", "path": "/path3", "type": "nfs", "content": "images"} + ]`, expectError: false, expected: []pveStorageEntry{ {Name: "storage1", Path: "/path1", Type: "dir", Content: "iso"}, {Name: "storage2", Path: "/path3", Type: "nfs", Content: "images"}, }, }, + { + name: "runtime status fields", + input: `[ + {"storage": "nfs-backup", "path": "/mnt/backup", "type": "nfs", "content": "backup", "active": 0, "enabled": 1, "status": "unknown"}, + {"storage": "local", "path": "/var/lib/vz", "type": "dir", "content": "iso", "active": true, "enabled": "true", "status": "available"} + ]`, + expectError: false, + expected: []pveStorageEntry{ + {Name: "nfs-backup", Path: "/mnt/backup", Type: "nfs", Content: "backup", Active: boolPtr(false), Enabled: boolPtr(true), Status: "unknown"}, + {Name: "local", Path: "/var/lib/vz", Type: "dir", Content: "iso", Active: boolPtr(true), Enabled: boolPtr(true), Status: "available"}, + }, + }, } for _, tt := range tests { @@ -145,6 +158,15 @@ func TestParseNodeStorageList(t *testing.T) { if entry.Content != tt.expected[i].Content { t.Errorf("entry[%d].Content = %q, want %q", i, entry.Content, tt.expected[i].Content) } + if got, want := entry.Status, tt.expected[i].Status; got != want { + t.Errorf("entry[%d].Status = %q, want %q", i, got, want) + } + if got, want := entry.Active, tt.expected[i].Active; (got == nil) != (want == nil) || (got != nil && want != nil && *got != *want) { + t.Errorf("entry[%d].Active = %v, want %v", i, got, want) + } + if got, want := entry.Enabled, tt.expected[i].Enabled; (got == nil) != (want == nil) || (got != nil && want != nil && *got != *want) { + t.Errorf("entry[%d].Enabled = %v, want %v", i, got, want) + } } }) } diff --git a/internal/checks/checks.go b/internal/checks/checks.go index 05df098..aee6920 100644 --- a/internal/checks/checks.go +++ b/internal/checks/checks.go @@ -7,6 +7,8 @@ import ( "math" "os" "path/filepath" + "strconv" + "strings" "syscall" "time" @@ -26,6 +28,7 @@ var ( osWriteFile = os.WriteFile osSymlink = os.Symlink syncFile = func(f *os.File) error { return f.Sync() } + killFunc = func(pid int, sig syscall.Signal) error { return syscall.Kill(pid, sig) } // tempRootPath is the runtime path used by CheckTempDirectory. // It is a variable to allow tests to use a safe, isolated temporary directory. @@ -218,6 +221,48 @@ func (c *Checker) CheckDiskSpace() CheckResult { return result } +type lockFileMetadata struct { + PID int + Host string + Timestamp string +} + +func parseLockFileMetadata(content []byte) lockFileMetadata { + meta := lockFileMetadata{} + for _, line := range strings.Split(string(content), "\n") { + line = strings.TrimSpace(line) + switch { + case strings.HasPrefix(line, "pid="): + if pid, err := strconv.Atoi(strings.TrimPrefix(line, "pid=")); err == nil && pid > 0 { + meta.PID = pid + } + case strings.HasPrefix(line, "host="): + meta.Host = strings.TrimSpace(strings.TrimPrefix(line, "host=")) + case strings.HasPrefix(line, "time="): + meta.Timestamp = strings.TrimSpace(strings.TrimPrefix(line, "time=")) + } + } + return meta +} + +func sameHost(a, b string) bool { + a = strings.ToLower(strings.TrimSpace(a)) + b = strings.ToLower(strings.TrimSpace(b)) + if a == "" || b == "" { + return false + } + if a == b { + return true + } + short := func(s string) string { + if idx := strings.IndexByte(s, '.'); idx > 0 { + return s[:idx] + } + return s + } + return short(a) == short(b) +} + // CheckLockFile checks for stale lock files and creates a new lock func (c *Checker) CheckLockFile() CheckResult { result := CheckResult{ @@ -231,29 +276,85 @@ func (c *Checker) CheckLockFile() CheckResult { } c.logger.Debug("Lock file path: %s", lockPath) + info, statErr := osStat(lockPath) + if statErr != nil && !os.IsNotExist(statErr) { + result.Error = fmt.Errorf("failed to stat lock file: %w", statErr) + result.Message = result.Error.Error() + return result + } + // Check if lock file exists - if _, err := osStat(lockPath); err == nil { - // Lock file exists, check its age - info, err := osStat(lockPath) - if err != nil { - result.Error = fmt.Errorf("failed to stat lock file: %w", err) - result.Message = result.Error.Error() - return result + if statErr == nil { + age := time.Since(info.ModTime()) + + formatInProgress := func(age time.Duration, meta lockFileMetadata) string { + parts := []string{fmt.Sprintf("lock age: %v", age)} + if meta.PID > 0 { + parts = append(parts, fmt.Sprintf("pid=%d", meta.PID)) + } + if meta.Host != "" { + parts = append(parts, fmt.Sprintf("host=%s", meta.Host)) + } + if meta.Timestamp != "" { + parts = append(parts, fmt.Sprintf("time=%s", meta.Timestamp)) + } + return "Another backup is in progress (" + strings.Join(parts, ", ") + ")" } - age := time.Since(info.ModTime()) - if age > c.config.MaxLockAge { - // Stale lock file, remove it - c.logger.Warning("Removing stale lock file (age: %v)", age) - if err := osRemove(lockPath); err != nil { - result.Error = fmt.Errorf("failed to remove stale lock: %w", err) - result.Message = result.Error.Error() + var meta lockFileMetadata + if content, rerr := os.ReadFile(lockPath); rerr == nil { + meta = parseLockFileMetadata(content) + } else { + c.logger.Debug("Failed to read lock file %s: %v", lockPath, rerr) + } + + hostname, _ := os.Hostname() + if meta.PID > 0 && sameHost(meta.Host, hostname) { + // Only perform PID liveness checks when the lock host matches the current host. + // This avoids false positives/negatives when the lock file resides on shared storage. + killErr := killFunc(meta.PID, 0) + if killErr == nil || errors.Is(killErr, syscall.EPERM) { + result.Message = formatInProgress(age, meta) + c.logger.Error("%s", result.Message) return result } + if errors.Is(killErr, syscall.ESRCH) { + c.logger.Warning("Removing stale lock file (pid %d not running, age: %v)", meta.PID, age) + if err := osRemove(lockPath); err != nil { + result.Error = fmt.Errorf("failed to remove stale lock: %w", err) + result.Message = result.Error.Error() + return result + } + } else { + // Unexpected error: fall back to age-based detection. + c.logger.Debug("Lock file liveness check failed (pid=%d): %v", meta.PID, killErr) + if age > c.config.MaxLockAge { + c.logger.Warning("Removing stale lock file (age: %v)", age) + if err := osRemove(lockPath); err != nil { + result.Error = fmt.Errorf("failed to remove stale lock: %w", err) + result.Message = result.Error.Error() + return result + } + } else { + result.Message = formatInProgress(age, meta) + c.logger.Error("%s", result.Message) + return result + } + } } else { - result.Message = fmt.Sprintf("Another backup is in progress (lock age: %v)", age) - c.logger.Error("%s", result.Message) - return result + // No usable PID/host metadata; fall back to age-based stale detection. + if age > c.config.MaxLockAge { + c.logger.Warning("Removing stale lock file (age: %v)", age) + if err := osRemove(lockPath); err != nil { + result.Error = fmt.Errorf("failed to remove stale lock: %w", err) + result.Message = result.Error.Error() + return result + } + } else { + result.Message = formatInProgress(age, meta) + c.logger.Error("%s", result.Message) + return result + } } } diff --git a/internal/checks/checks_test.go b/internal/checks/checks_test.go index 5f60d0d..bc1e4e1 100644 --- a/internal/checks/checks_test.go +++ b/internal/checks/checks_test.go @@ -149,6 +149,55 @@ func TestCheckLockFileStaleLock(t *testing.T) { checker.ReleaseLock() } +func TestCheckLockFile_RemovesLockWhenProcessIsGone(t *testing.T) { + logger := logging.New(types.LogLevelInfo, false) + logger.SetOutput(io.Discard) + + tmpDir := t.TempDir() + lockPath := filepath.Join(tmpDir, ".backup.lock") + host, _ := os.Hostname() + + // Create a "fresh" lock file that references a non-existent PID. + content := fmt.Sprintf("pid=99999\nhost=%s\ntime=%s\n", host, time.Now().Format(time.RFC3339)) + if err := os.WriteFile(lockPath, []byte(content), 0644); err != nil { + t.Fatalf("Failed to create test lock file: %v", err) + } + now := time.Now() + if err := os.Chtimes(lockPath, now, now); err != nil { + t.Fatalf("Failed to set lock file time: %v", err) + } + + oldKill := killFunc + killFunc = func(pid int, sig syscall.Signal) error { + return syscall.ESRCH + } + t.Cleanup(func() { killFunc = oldKill }) + + config := &CheckerConfig{ + BackupPath: tmpDir, + LogPath: tmpDir, + LockDirPath: tmpDir, + LockFilePath: lockPath, + MaxLockAge: 1 * time.Hour, + DryRun: false, + } + checker := NewChecker(logger, config) + + result := checker.CheckLockFile() + if !result.Passed { + t.Fatalf("CheckLockFile should succeed after removing stale lock: %s", result.Message) + } + t.Cleanup(func() { _ = checker.ReleaseLock() }) + + data, err := os.ReadFile(lockPath) + if err != nil { + t.Fatalf("read lock file: %v", err) + } + if !strings.Contains(string(data), fmt.Sprintf("pid=%d\n", os.Getpid())) { + t.Fatalf("expected new lock file to contain current pid, got: %q", string(data)) + } +} + func TestCheckLockFile_WritesExpectedContent(t *testing.T) { logger := logging.New(types.LogLevelInfo, false) logger.SetOutput(io.Discard) @@ -1000,7 +1049,7 @@ func TestRunAllChecks_FailsOnLockFile(t *testing.T) { } } -func TestCheckLockFile_StatFailsAfterExistenceCheck(t *testing.T) { +func TestCheckLockFile_StatFails(t *testing.T) { logger := logging.New(types.LogLevelInfo, false) logger.SetOutput(io.Discard) @@ -1016,13 +1065,9 @@ func TestCheckLockFile_StatFailsAfterExistenceCheck(t *testing.T) { origStat := osStat t.Cleanup(func() { osStat = origStat }) - calls := 0 osStat = func(name string) (os.FileInfo, error) { if name == lockPath { - calls++ - if calls == 2 { - return nil, &os.PathError{Op: "stat", Path: name, Err: syscall.EIO} - } + return nil, &os.PathError{Op: "stat", Path: name, Err: syscall.EIO} } return origStat(name) } diff --git a/internal/config/config.go b/internal/config/config.go index 50094b1..a3217b9 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -197,6 +197,7 @@ type Config struct { PVEBackupIncludePattern string BackupCephConfig bool CephConfigPath string + PveshTimeoutSeconds int // Timeout for pvesh commands (seconds). 0 disables the timeout. // PBS-specific collection options BackupDatastoreConfigs bool @@ -654,6 +655,10 @@ func (c *Config) parsePVESettings() error { c.BackupPVESchedules = c.getBool("BACKUP_PVE_SCHEDULES", true) c.BackupPVEReplication = c.getBool("BACKUP_PVE_REPLICATION", true) c.BackupPVEBackupFiles = c.getBool("BACKUP_PVE_BACKUP_FILES", true) + c.PveshTimeoutSeconds = c.getInt("PVESH_TIMEOUT", 15) + if c.PveshTimeoutSeconds < 0 { + c.PveshTimeoutSeconds = 15 + } c.BackupSmallPVEBackups = c.getBool("BACKUP_SMALL_PVE_BACKUPS", false) if rawSize := strings.TrimSpace(c.getString("MAX_PVE_BACKUP_SIZE", "")); rawSize != "" { sizeBytes, err := parseSizeToBytes(rawSize) diff --git a/internal/config/templates/backup.env b/internal/config/templates/backup.env index 01244a2..c9f5b63 100644 --- a/internal/config/templates/backup.env +++ b/internal/config/templates/backup.env @@ -280,6 +280,7 @@ BACKUP_PVE_ACL=true # Access control (users/roles/groups/ACL; rea BACKUP_PVE_JOBS=true BACKUP_PVE_SCHEDULES=true BACKUP_PVE_REPLICATION=true +PVESH_TIMEOUT=15 # seconds (0 disables). Timeout for pvesh calls during PVE collection. BACKUP_PVE_BACKUP_FILES=true BACKUP_SMALL_PVE_BACKUPS=false MAX_PVE_BACKUP_SIZE=100M diff --git a/internal/orchestrator/orchestrator.go b/internal/orchestrator/orchestrator.go index 60510de..c953c23 100644 --- a/internal/orchestrator/orchestrator.go +++ b/internal/orchestrator/orchestrator.go @@ -1510,6 +1510,7 @@ func applyCollectorOverrides(cc *backup.CollectorConfig, cfg *config.Config) { cc.PVEBackupIncludePattern = cfg.PVEBackupIncludePattern cc.BackupCephConfig = cfg.BackupCephConfig cc.CephConfigPath = cfg.CephConfigPath + cc.PveshTimeoutSeconds = cfg.PveshTimeoutSeconds cc.BackupDatastoreConfigs = cfg.BackupDatastoreConfigs cc.BackupPBSS3Endpoints = cfg.BackupPBSS3Endpoints From e6f02e49429ffcb207ad32e03a5f0e90eaede5c8 Mon Sep 17 00:00:00 2001 From: Damiano <71268257+tis24dev@users.noreply.github.com> Date: Fri, 20 Feb 2026 15:51:03 +0100 Subject: [PATCH 22/24] Add FS_IO_TIMEOUT and safefs bounded probes Introduce FS_IO_TIMEOUT (config/ENV) and integrate a new internal/safefs helper to perform filesystem probes (stat, readdir, statfs) with a configurable timeout to avoid hangs on unreachable network mounts. Propagate the FS IO timeout through CollectorConfig and use it in datastore/storage sampling, PXAR metadata collection, directory/file sampling and report generation; timeouts are handled gracefully with warnings and skips. Also: fix pvesh command context cancellation handling, tighten RunCommand/LookPath deps signatures, update tests to accept context/timeout parameters, and update documentation (config/CLI/restore/troubleshooting) to describe the new option and adjust service stop ordering and other doc cleanups. --- cmd/proxsave/main.go | 1 + docs/BACKUP_ENV_MAPPING.md | 3 +- docs/CLI_REFERENCE.md | 16 - docs/CLUSTER_RECOVERY.md | 4 +- docs/CONFIGURATION.md | 15 +- docs/RESTORE_DIAGRAMS.md | 29 +- docs/RESTORE_GUIDE.md | 22 +- docs/RESTORE_TECHNICAL.md | 8 +- docs/TROUBLESHOOTING.md | 32 +- internal/backup/collector.go | 66 +-- internal/backup/collector_deps.go | 9 +- .../collector_pbs_commands_coverage_test.go | 29 +- internal/backup/collector_pbs_datastore.go | 80 ++- internal/backup/collector_pbs_extra_test.go | 9 +- internal/backup/collector_pbs_test.go | 12 +- internal/backup/collector_pve.go | 451 ++++++++++---- .../backup/collector_pve_additional_test.go | 24 +- internal/backup/collector_pve_parse_test.go | 53 +- .../backup/collector_pxar_datastore_test.go | 2 +- .../backup/collector_pxar_reports_test.go | 5 +- internal/backup/optimizations.go | 549 +----------------- internal/backup/optimizations_helpers_test.go | 241 +------- internal/checks/checks.go | 46 +- internal/checks/checks_test.go | 50 +- internal/config/config.go | 7 +- internal/config/templates/backup.env | 3 +- .../orchestrator/additional_helpers_test.go | 16 - internal/orchestrator/backup_safety.go | 4 +- internal/orchestrator/compatibility.go | 10 +- internal/orchestrator/compatibility_test.go | 20 - internal/orchestrator/decrypt_test.go | 7 +- internal/orchestrator/deps_test.go | 6 - internal/orchestrator/network_apply.go | 19 +- internal/orchestrator/orchestrator.go | 12 +- internal/orchestrator/pbs_staged_apply.go | 282 ++++----- internal/orchestrator/restore.go | 46 +- .../orchestrator/restore_access_control_ui.go | 10 +- internal/orchestrator/restore_errors_test.go | 71 +-- internal/orchestrator/restore_firewall.go | 15 +- .../orchestrator/restore_firewall_test.go | 29 - internal/orchestrator/restore_ha.go | 9 +- .../orchestrator/restore_notifications.go | 27 +- .../orchestrator/restore_workflow_test.go | 108 ---- internal/orchestrator/restore_workflow_ui.go | 120 +--- .../restore_workflow_warnings_test.go | 92 --- internal/orchestrator/selective.go | 157 ++--- .../orchestrator/selective_additional_test.go | 5 +- internal/orchestrator/selective_pure_test.go | 5 +- internal/orchestrator/staging.go | 75 +-- internal/pbs/namespaces.go | 32 +- internal/pbs/namespaces_test.go | 17 +- internal/safefs/safefs.go | 155 +++++ internal/safefs/safefs_test.go | 74 +++ 53 files changed, 1128 insertions(+), 2061 deletions(-) create mode 100644 internal/safefs/safefs.go create mode 100644 internal/safefs/safefs_test.go diff --git a/cmd/proxsave/main.go b/cmd/proxsave/main.go index 7241cd8..1d51c69 100644 --- a/cmd/proxsave/main.go +++ b/cmd/proxsave/main.go @@ -1057,6 +1057,7 @@ func run() int { checkerConfig.MinDiskPrimaryGB = cfg.MinDiskPrimaryGB checkerConfig.MinDiskSecondaryGB = cfg.MinDiskSecondaryGB checkerConfig.MinDiskCloudGB = cfg.MinDiskCloudGB + checkerConfig.FsIoTimeout = time.Duration(cfg.FsIoTimeoutSeconds) * time.Second checkerConfig.DryRun = dryRun checkerDone := logging.DebugStart(logger, "pre-backup check config", "dry_run=%v", dryRun) if err := checkerConfig.Validate(); err != nil { diff --git a/docs/BACKUP_ENV_MAPPING.md b/docs/BACKUP_ENV_MAPPING.md index 488b7dc..7c97ccd 100644 --- a/docs/BACKUP_ENV_MAPPING.md +++ b/docs/BACKUP_ENV_MAPPING.md @@ -88,7 +88,8 @@ WEBHOOK_TIMEOUT = SAME ## Go-only variables (new) SYSTEM_ROOT_PREFIX = NEW (Go-only) → Override system root for collection (testing/chroot). Empty or "/" uses the real root. -PVESH_TIMEOUT = NEW (Go-only) → Timeout in seconds for `pvesh` calls during PVE collection (0 disables). +PVESH_TIMEOUT = NEW (Go-only) → Timeout (seconds) for each `pvesh` command execution (0=disabled). +FS_IO_TIMEOUT = NEW (Go-only) → Timeout (seconds) for filesystem probes (stat/readdir/statfs) on storages (0=disabled). Helps avoid hangs on unreachable network mounts. NOTE: PBS restore behavior is selected interactively during `--restore` and is intentionally not configured via `backup.env`. BACKUP_PBS_S3_ENDPOINTS = NEW (Go-only) → Collect `s3.cfg` and S3 endpoint snapshots (PBS). BACKUP_PBS_NODE_CONFIG = NEW (Go-only) → Collect `node.cfg` and node snapshots (PBS). diff --git a/docs/CLI_REFERENCE.md b/docs/CLI_REFERENCE.md index d32a00d..eae8335 100644 --- a/docs/CLI_REFERENCE.md +++ b/docs/CLI_REFERENCE.md @@ -496,22 +496,6 @@ If you want to remove those guards manually (optional): - `--log-level` (CLI flag): Controls logging verbosity - `DEBUG_LEVEL` (config): Controls operation detail level (`standard`/`advanced`/`extreme`) -### Log Labels (PHASE/STEP/SKIP) - -Some log lines use a label to make the output easier to scan: - -| Label | Level | Meaning | -|-------|-------|---------| -| `PHASE` | `info` | High-level workflow phase marker | -| `STEP` | `info` | A notable step within a phase | -| `SKIP` | `info` | Optional item intentionally skipped or not applicable | - -**Common `SKIP` examples**: -- A feature is disabled by configuration. -- A non-critical CLI tool is not installed. -- Running in an **unprivileged container/rootless** environment where low-level inventory commands are expected to fail (for example `dmidecode` or `blkid`). In this case, ProxSave still attempts the collection, but logs a `SKIP` (not a `WARNING`) when the failure matches known “missing privileges” patterns. - - For `blkid`, the skip reason also includes a restore hint: automated `/etc/fstab` device remap (UUID/PARTUUID/LABEL) may be limited. - ### Flag Reference | Flag | Short | Description | diff --git a/docs/CLUSTER_RECOVERY.md b/docs/CLUSTER_RECOVERY.md index 9178d4c..376fdfc 100644 --- a/docs/CLUSTER_RECOVERY.md +++ b/docs/CLUSTER_RECOVERY.md @@ -1575,7 +1575,7 @@ curl -k https://localhost:8006 ```bash # 1. Stop everything -systemctl stop pvestatd pveproxy pvedaemon pve-cluster +systemctl stop pve-cluster pvedaemon pveproxy pvestatd killall pmxcfs 2>/dev/null # 2. Force unmount /etc/pve @@ -1598,7 +1598,7 @@ pvecm status ```bash # 1. Stop and disable services -systemctl stop pvestatd pveproxy pvedaemon pve-cluster +systemctl stop pve-cluster pvedaemon pveproxy pvestatd systemctl disable pve-cluster # 2. Force unmount diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 60e15c1..8db9ab0 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -958,14 +958,13 @@ BACKUP_PVE_SCHEDULES=true # Cron schedules # Replication BACKUP_PVE_REPLICATION=true # VM/CT replication config -# pvesh timeout -PVESH_TIMEOUT=15 # Timeout for pvesh calls during PVE collection (seconds; 0 disables) - -# PVE backup files -BACKUP_PVE_BACKUP_FILES=true # Include backup files from /var/lib/vz/dump -BACKUP_SMALL_PVE_BACKUPS=false # Include small backups only -MAX_PVE_BACKUP_SIZE=100M # Max size for "small" backups -PVE_BACKUP_INCLUDE_PATTERN= # Glob patterns to include + # PVE backup files + BACKUP_PVE_BACKUP_FILES=true # Include backup files from /var/lib/vz/dump + PVESH_TIMEOUT=15 # Timeout (seconds) for each `pvesh` call (0=disabled) + FS_IO_TIMEOUT=30 # Timeout (seconds) for filesystem probes on storages (stat/readdir/statfs). Helps avoid hangs on unreachable network mounts (0=disabled) + BACKUP_SMALL_PVE_BACKUPS=false # Include small backups only + MAX_PVE_BACKUP_SIZE=100M # Max size for "small" backups + PVE_BACKUP_INCLUDE_PATTERN= # Glob patterns to include # Ceph configuration BACKUP_CEPH_CONFIG=false # Ceph cluster config diff --git a/docs/RESTORE_DIAGRAMS.md b/docs/RESTORE_DIAGRAMS.md index 0f34a35..3dca90a 100644 --- a/docs/RESTORE_DIAGRAMS.md +++ b/docs/RESTORE_DIAGRAMS.md @@ -156,17 +156,18 @@ sequenceDiagram Restore->>Restore: Detect needsClusterRestore = true Note over Restore,Services: Service Stop Phase - Restore->>Services: systemctl stop pvestatd - Services-->>Restore: Stopped - Restore->>Services: systemctl stop pveproxy - Services-->>Restore: Stopped - Restore->>Services: systemctl stop pvedaemon - Services-->>Restore: Stopped Restore->>Services: systemctl stop pve-cluster Services->>FS: Unmount /etc/pve (FUSE) Services->>DB: Close file handles Services-->>Restore: Stopped + Restore->>Services: systemctl stop pvedaemon + Services-->>Restore: Stopped + Restore->>Services: systemctl stop pveproxy + Services-->>Restore: Stopped + Restore->>Services: systemctl stop pvestatd + Services-->>Restore: Stopped + Note over Restore,FS: Unmount Phase Restore->>FS: umount /etc/pve FS-->>Restore: Unmounted (or already unmounted) @@ -280,15 +281,15 @@ stateDiagram-v2 Running --> Stopping: User initiates restore state Stopping { - [*] --> StopStatd - StopStatd: systemctl stop pvestatd - StopStatd --> StopProxy - StopProxy: systemctl stop pveproxy - StopProxy --> StopDaemon - StopDaemon: systemctl stop pvedaemon - StopDaemon --> StopCluster + [*] --> StopCluster StopCluster: systemctl stop pve-cluster - StopCluster --> UnmountPVE + StopCluster --> StopDaemon + StopDaemon: systemctl stop pvedaemon + StopDaemon --> StopProxy + StopProxy: systemctl stop pveproxy + StopProxy --> StopStatd + StopStatd: systemctl stop pvestatd + StopStatd --> UnmountPVE UnmountPVE: umount /etc/pve UnmountPVE --> [*] } diff --git a/docs/RESTORE_GUIDE.md b/docs/RESTORE_GUIDE.md index 0fae87d..bac43c2 100644 --- a/docs/RESTORE_GUIDE.md +++ b/docs/RESTORE_GUIDE.md @@ -86,7 +86,7 @@ Restore operations are organized into **20–22 categories** (PBS = 20, PVE = 22 Each category is handled in one of three ways: - **Normal**: extracted directly to `/` (system paths) after safety backup -- **Staged**: extracted to `/tmp/proxsave/restore-stage-*` (permissions `0700`) and then applied in a controlled way (file copy/validation or API apply: `pvesh`/`pveum` on PVE, `proxmox-backup-manager` on PBS); when staged files are written to system paths, ProxSave applies them **atomically** and enforces the final permissions/ownership (including for any created parent directories; not left to `umask`). On clean restores, the staging directory is removed automatically; set `PROXSAVE_PRESERVE_RESTORE_STAGING=1` to keep it. +- **Staged**: extracted to `/tmp/proxsave/restore-stage-*` and then applied in a controlled way (file copy/validation or API apply: `pvesh`/`pveum` on PVE, `proxmox-backup-manager` on PBS); when staged files are written to system paths, ProxSave applies them **atomically** and enforces the final permissions/ownership (including for any created parent directories; not left to `umask`) - **Export-only**: extracted to an export directory for manual review (never written to system paths) ### PVE-Specific Categories (11 categories) @@ -916,10 +916,10 @@ Before Restore: └─────────────┘ Stop Phase: - systemctl stop pvestatd - systemctl stop pveproxy - systemctl stop pvedaemon systemctl stop pve-cluster ← /etc/pve unmounted + systemctl stop pvedaemon + systemctl stop pveproxy + systemctl stop pvestatd umount /etc/pve (if needed) Restore Phase: @@ -1922,7 +1922,7 @@ If the restore includes filesystem configuration (notably `/etc/fstab`), ProxSav - Compares the current `/etc/fstab` with the backup copy. - Keeps existing critical entries (for example, root and swap) when they already match the running system. - Detects **safe mount candidates** from the backup (for example, additional NFS mounts) and offers to add them. -- If ProxSave inventory data is present in the backup, ProxSave can remap **unstable** `/dev/*` devices from the backup (for example `/dev/sdb1`) to stable `UUID=`/`PARTUUID=`/`LABEL=` references **on the restore host** (only when the stable reference exists on the system). Note: backups taken from an **unprivileged container/rootless** environment may not include usable block-device inventory, so automated remap can be limited/unavailable. +- If ProxSave inventory data is present in the backup, ProxSave can remap **unstable** `/dev/*` devices from the backup (for example `/dev/sdb1`) to stable `UUID=`/`PARTUUID=`/`LABEL=` references **on the restore host** (only when the stable reference exists on the system). - Normalizes restored entries by adding `nofail` (and `_netdev` for network mounts) so offline storage does not block boot/restore. **Safety behavior**: @@ -2486,12 +2486,12 @@ Use Ctrl+C carefully - wait for current file to finish. **Q: How do I rollback a failed restore?** A: Use the safety backup: - ```bash - # Stop services (if cluster restore) - systemctl stop pvestatd pveproxy pvedaemon pve-cluster +```bash +# Stop services (if cluster restore) +systemctl stop pve-cluster pvedaemon pveproxy pvestatd - # Extract safety backup - tar -xzf /tmp/proxsave/restore_backup_*.tar.gz -C / +# Extract safety backup +tar -xzf /tmp/proxsave/restore_backup_*.tar.gz -C / # Restart services systemctl restart pve-cluster pvedaemon pveproxy pvestatd @@ -2699,7 +2699,7 @@ tar -xzf /path/to/decrypted.tar.gz ./specific/file/path A: Yes: - **Extraction**: ProxSave preserves UID/GID, mode bits and timestamps (mtime/atime) for extracted entries. -- **Staged categories**: files are extracted under `/tmp/proxsave/restore-stage-*` (permissions `0700`) and then applied to system paths using atomic replace; ProxSave explicitly applies mode bits (not left to `umask`) and preserves/derives ownership/group to match expected system defaults (important on PBS, where `proxmox-backup-proxy` runs as `backup`; ProxSave also repairs common `root:root` group regressions by inheriting the destination parent directory's group). On supported filesystems, staged writes also `fsync()` the temporary file and the destination directory to reduce the risk of incomplete writes after a crash/power loss. On clean restores, the staging directory is removed automatically (override: `PROXSAVE_PRESERVE_RESTORE_STAGING=1`). +- **Staged categories**: files are extracted under `/tmp/proxsave/restore-stage-*` and then applied to system paths using atomic replace; ProxSave explicitly applies mode bits (not left to `umask`) and preserves/derives ownership/group to match expected system defaults (important on PBS, where `proxmox-backup-proxy` runs as `backup`; ProxSave also repairs common `root:root` group regressions by inheriting the destination parent directory's group). On supported filesystems, staged writes also `fsync()` the temporary file and the destination directory to reduce the risk of incomplete writes after a crash/power loss. - **ctime**: Cannot be set (kernel-managed). --- diff --git a/docs/RESTORE_TECHNICAL.md b/docs/RESTORE_TECHNICAL.md index 5cdf984..2bd5c02 100644 --- a/docs/RESTORE_TECHNICAL.md +++ b/docs/RESTORE_TECHNICAL.md @@ -871,7 +871,7 @@ func extractSelectiveArchive( #### Phase 10: Staged Apply (PVE/PBS) -After extraction, **staged categories** are applied from the staging directory under `/tmp/proxsave/restore-stage-*` (permissions `0700`). On clean restores, ProxSave removes the staging directory automatically (override: `PROXSAVE_PRESERVE_RESTORE_STAGING=1`). +After extraction, **staged categories** are applied from the staging directory under `/tmp/proxsave/restore-stage-*`. **PBS staged apply**: - Selected interactively during restore on PBS hosts: **Merge (existing PBS)** vs **Clean 1:1 (fresh PBS install)**. @@ -879,7 +879,6 @@ After extraction, **staged categories** are applied from the staging directory u - **Merge**: create/update only (no deletions of existing objects not in the backup). - **Clean 1:1**: attempts 1:1 reconciliation (may remove objects not present in the backup). - If API apply is unavailable or fails, ProxSave may fall back to applying staged `*.cfg` files back to `/etc/proxmox-backup` (**Clean 1:1 only**). -- PBS services are kept stopped while file-based staged configs are written; API-backed categories are applied in a final API phase with services started temporarily. **Current PBS API coverage**: - `pbs_host`: node + traffic control @@ -891,8 +890,8 @@ After extraction, **staged categories** are applied from the staging directory u Other PBS categories remain file-based (e.g. access control, tape, proxy/ACME/metricserver). **Key code paths**: -- `internal/orchestrator/pbs_staged_apply.go` (`maybeApplyPBSConfigsFromStage`, `maybeApplyPBSConfigsViaAPIFromStage`) -- `internal/orchestrator/restore_notifications.go` (`maybeApplyNotificationsFromStage`, `pve_notifications`) +- `internal/orchestrator/pbs_staged_apply.go` (`maybeApplyPBSConfigsFromStage`) +- `internal/orchestrator/restore_notifications.go` (`maybeApplyNotificationsFromStage`, `pbs_notifications`) - `internal/orchestrator/pbs_api_apply.go` / `internal/orchestrator/pbs_notifications_api_apply.go` (API apply engines) ## Category System @@ -1511,7 +1510,6 @@ When restoring to the real system root (`/`), ProxSave avoids blindly overwritin - If the backup contains ProxSave inventory (`var/lib/proxsave-info/commands/system/{blkid.txt,lsblk_json.json,lsblk.txt}` or PBS datastore inventory), ProxSave can remap unstable device paths from the backup (e.g. `/dev/sdb1`) to stable references (`UUID=`/`PARTUUID=`/`LABEL=`) **when the stable reference exists on the restore host**. - This reduces the risk of mounting the wrong disk after a reinstall where `/dev/sdX` ordering changes. -- Note: backups taken from an **unprivileged container/rootless** environment may not include usable block-device inventory (for example `blkid` output can be empty/skipped). In that case, automated device remap is limited/unavailable and `/etc/fstab` entries may require manual review during restore. **Normalization**: - Entries written by the merge are normalized to include `nofail` (and `_netdev` for network mounts) to prevent offline storage from blocking boot/restore. diff --git a/docs/TROUBLESHOOTING.md b/docs/TROUBLESHOOTING.md index 7f179e3..524fe09 100644 --- a/docs/TROUBLESHOOTING.md +++ b/docs/TROUBLESHOOTING.md @@ -168,35 +168,6 @@ COMPRESSION_TYPE=xz # Valid: xz, zstd, gzip, bzip2, lz4 --- -#### Notice: `SKIP ... Expected in unprivileged containers` (LXC/rootless) - -**Symptoms**: -- Running ProxSave inside an **unprivileged** LXC container (or a rootless container) produces log lines like: - - `SKIP Skipping Hardware DMI information: command \`dmidecode\` failed (...). Expected in unprivileged containers (...)` - - `SKIP Skipping Block device identifiers (blkid): command \`blkid\` failed (...). Expected in unprivileged containers (block devices not accessible; restore hint: automated fstab device remap (UUID/PARTUUID/LABEL) may be limited)` - -**Cause**: In unprivileged containers, access to low-level system interfaces is intentionally restricted (for example `/dev/mem` and most block devices). Commands like `dmidecode` and `blkid` can fail even though the backup itself is working correctly. - -**Behavior**: -- ProxSave still attempts to collect the data. -- When the failure matches a known “missing privileges” pattern for a small allowlist of commands, it is logged as `SKIP` (info) instead of `WARNING`. -- Other failures are **not** downgraded and still appear as warnings/errors. - -**Impact**: -- Hardware inventory output may be missing/empty. -- If `blkid` is skipped, ProxSave restore may have **limited** ability to automatically remap `/etc/fstab` devices (UUID/PARTUUID/LABEL). You may need to review mounts manually during restore. - -**How to verify** (unprivileged user namespace mapping): -```bash -cat /proc/self/uid_map -cat /proc/self/gid_map -# If the second column is non-zero (e.g. "0 100000 65536"), you're in a shifted/unprivileged mapping. -``` - -**Optional**: If you want to hide `SKIP` lines on the console, run with `--log-level warning` (this also hides normal info logs). - ---- - ### 3. Cloud Storage Issues #### Error: `rclone not found in PATH` @@ -1052,6 +1023,9 @@ A: Update your configuration: `./build/proxsave --upgrade-config` **Q: Can I run backup while another backup is in progress?** A: No. Use a lock file (`BACKUP_PATH/.backup.lock`) to prevent concurrent runs. +**Q: Backup hangs during PVE datastore detection when a network storage is unreachable.** +A: Set `FS_IO_TIMEOUT` to cap how long proxsave waits for filesystem probes (stat/readdir/statfs), and `PVESH_TIMEOUT` to cap `pvesh` calls. This reduces the likelihood of indefinite hangs when a storage becomes unreachable mid-run. + **Q: How do I recover from a failed backup?** A: Delete the incomplete backup file and re-run. The system automatically handles cleanup. diff --git a/internal/backup/collector.go b/internal/backup/collector.go index 93edfe9..4704252 100644 --- a/internal/backup/collector.go +++ b/internal/backup/collector.go @@ -154,6 +154,7 @@ type CollectorConfig struct { BackupCephConfig bool CephConfigPath string PveshTimeoutSeconds int + FsIoTimeoutSeconds int // PBS-specific collection options BackupDatastoreConfigs bool @@ -288,6 +289,12 @@ func (c *CollectorConfig) Validate() error { if c.MaxPVEBackupSizeBytes < 0 { return fmt.Errorf("MAX_PVE_BACKUP_SIZE must be >= 0") } + if c.PveshTimeoutSeconds < 0 { + c.PveshTimeoutSeconds = 15 + } + if c.FsIoTimeoutSeconds < 0 { + c.FsIoTimeoutSeconds = 30 + } if c.SystemRootPrefix != "" && !filepath.IsAbs(c.SystemRootPrefix) { return fmt.Errorf("system root prefix must be an absolute path") } @@ -333,6 +340,7 @@ func GetDefaultCollectorConfig() *CollectorConfig { BackupCephConfig: true, CephConfigPath: "/etc/ceph", PveshTimeoutSeconds: 15, + FsIoTimeoutSeconds: 30, // PBS-specific (all enabled by default) BackupDatastoreConfigs: true, @@ -925,47 +933,25 @@ func (c *Collector) safeCmdOutput(ctx context.Context, cmd, output, description cmdString := strings.Join(cmdParts, " ") runCtx := ctx - cancel := func() {} + var cancel context.CancelFunc if cmdParts[0] == "pvesh" && c.config != nil && c.config.PveshTimeoutSeconds > 0 { runCtx, cancel = context.WithTimeout(ctx, time.Duration(c.config.PveshTimeoutSeconds)*time.Second) } - defer cancel() + if cancel != nil { + defer cancel() + } + out, err := c.depRunCommand(runCtx, cmdParts[0], cmdParts[1:]...) if err != nil { if critical { c.incFilesFailed() return fmt.Errorf("critical command `%s` failed for %s: %w (output: %s)", cmdString, description, err, summarizeCommandOutputText(string(out))) } - - exitCode := -1 - var exitErr *exec.ExitError - if errors.As(err, &exitErr) { - exitCode = exitErr.ExitCode() - } - outputText := strings.TrimSpace(string(out)) - if ctxInfo := c.depDetectUnprivilegedContainer(); ctxInfo.Detected { - if reason := privilegeSensitiveFailureReason(cmdParts[0], exitCode, outputText); reason != "" { - details := strings.TrimSpace(ctxInfo.Details) - if details != "" { - details = " (" + details + ")" - } - c.logger.Skip("Skipping %s: command `%s` failed (%v). Expected in unprivileged containers%s (%s). Non-critical; backup continues.", - description, - cmdString, - err, - details, - reason, - ) - c.logger.Debug("Skip details for %s: output: %s", description, summarizeCommandOutputText(outputText)) - return nil - } - } - c.logger.Warning("Skipping %s: command `%s` failed (%v). Non-critical; backup continues. Ensure the required CLI is available and has proper permissions. Output: %s", description, cmdString, err, - summarizeCommandOutputText(outputText), + summarizeCommandOutputText(string(out)), ) return nil // Non-critical failure } @@ -1260,11 +1246,13 @@ func (c *Collector) captureCommandOutput(ctx context.Context, cmd, output, descr } runCtx := ctx - cancel := func() {} + var cancel context.CancelFunc if parts[0] == "pvesh" && c.config != nil && c.config.PveshTimeoutSeconds > 0 { runCtx, cancel = context.WithTimeout(ctx, time.Duration(c.config.PveshTimeoutSeconds)*time.Second) } - defer cancel() + if cancel != nil { + defer cancel() + } out, err := c.depRunCommand(runCtx, parts[0], parts[1:]...) if err != nil { @@ -1298,24 +1286,6 @@ func (c *Collector) captureCommandOutput(ctx context.Context, cmd, output, descr } } - if ctxInfo := c.depDetectUnprivilegedContainer(); ctxInfo.Detected { - if reason := privilegeSensitiveFailureReason(parts[0], exitCode, outputText); reason != "" { - details := strings.TrimSpace(ctxInfo.Details) - if details != "" { - details = " (" + details + ")" - } - c.logger.Skip("Skipping %s: command `%s` failed (%v). Expected in unprivileged containers%s (%s). Non-critical; backup continues.", - description, - cmdString, - err, - details, - reason, - ) - c.logger.Debug("Skip details for %s: output: %s", description, summarizeCommandOutputText(outputText)) - return nil, nil - } - } - c.logger.Warning("Skipping %s: command `%s` failed (%v). Non-critical; backup continues. Output: %s", description, cmdString, diff --git a/internal/backup/collector_deps.go b/internal/backup/collector_deps.go index 6cb980a..cbb9aff 100644 --- a/internal/backup/collector_deps.go +++ b/internal/backup/collector_deps.go @@ -26,11 +26,10 @@ var ( // CollectorDeps allows injecting external dependencies for the Collector. type CollectorDeps struct { - LookPath func(string) (string, error) - RunCommandWithEnv func(context.Context, []string, string, ...string) ([]byte, error) - RunCommand func(context.Context, string, ...string) ([]byte, error) - Stat func(string) (os.FileInfo, error) - DetectUnprivilegedContainer func() (bool, string) + LookPath func(string) (string, error) + RunCommandWithEnv func(context.Context, []string, string, ...string) ([]byte, error) + RunCommand func(context.Context, string, ...string) ([]byte, error) + Stat func(string) (os.FileInfo, error) } func defaultCollectorDeps() CollectorDeps { diff --git a/internal/backup/collector_pbs_commands_coverage_test.go b/internal/backup/collector_pbs_commands_coverage_test.go index 28bb64c..11011a5 100644 --- a/internal/backup/collector_pbs_commands_coverage_test.go +++ b/internal/backup/collector_pbs_commands_coverage_test.go @@ -8,6 +8,7 @@ import ( "path/filepath" "strings" "testing" + "time" "github.com/tis24dev/proxsave/internal/pbs" "github.com/tis24dev/proxsave/internal/types" @@ -50,18 +51,18 @@ func TestCollectPBSCommandsWritesExpectedOutputs(t *testing.T) { "datastore_store1_status.json", "acme_accounts.json", "acme_plugins.json", - "notification_targets.json", - "notification_matchers.json", - "notification_endpoints_smtp.json", - "notification_endpoints_sendmail.json", - "notification_endpoints_gotify.json", - "notification_endpoints_webhook.json", - "notifications_summary.json", - "user_list.json", - "realms_ldap.json", - "realms_ad.json", - "realms_openid.json", - "acl_list.json", + "notification_targets.json", + "notification_matchers.json", + "notification_endpoints_smtp.json", + "notification_endpoints_sendmail.json", + "notification_endpoints_gotify.json", + "notification_endpoints_webhook.json", + "notifications_summary.json", + "user_list.json", + "realms_ldap.json", + "realms_ad.json", + "realms_openid.json", + "acl_list.json", "remote_list.json", "sync_jobs.json", "verification_jobs.json", @@ -206,7 +207,7 @@ func TestCollectPBSPxarMetadataReturnsErrorWhenTempVarIsFile(t *testing.T) { func TestCollectDatastoreConfigsCreatesConfigAndNamespaceFiles(t *testing.T) { origList := listNamespacesFunc t.Cleanup(func() { listNamespacesFunc = origList }) - listNamespacesFunc = func(name, path string) ([]pbs.Namespace, bool, error) { + listNamespacesFunc = func(context.Context, string, string, time.Duration) ([]pbs.Namespace, bool, error) { return []pbs.Namespace{{Ns: "root", Path: "/"}}, false, nil } @@ -314,7 +315,7 @@ func TestCollectPBSConfigsEndToEndWithStubs(t *testing.T) { origList := listNamespacesFunc t.Cleanup(func() { listNamespacesFunc = origList }) - listNamespacesFunc = func(name, path string) ([]pbs.Namespace, bool, error) { + listNamespacesFunc = func(context.Context, string, string, time.Duration) ([]pbs.Namespace, bool, error) { return []pbs.Namespace{{Ns: "root", Path: "/"}}, false, nil } diff --git a/internal/backup/collector_pbs_datastore.go b/internal/backup/collector_pbs_datastore.go index 1b3d02c..77f427e 100644 --- a/internal/backup/collector_pbs_datastore.go +++ b/internal/backup/collector_pbs_datastore.go @@ -13,6 +13,7 @@ import ( "time" "github.com/tis24dev/proxsave/internal/pbs" + "github.com/tis24dev/proxsave/internal/safefs" ) type pbsDatastore struct { @@ -45,7 +46,7 @@ func (c *Collector) collectDatastoreConfigs(ctx context.Context, datastores []pb false) // Get namespace list using CLI/Filesystem fallback - if err := c.collectDatastoreNamespaces(ds, datastoreDir); err != nil { + if err := c.collectDatastoreNamespaces(ctx, ds, datastoreDir); err != nil { c.logger.Debug("Failed to collect namespaces for datastore %s: %v", ds.Name, err) } } @@ -56,7 +57,7 @@ func (c *Collector) collectDatastoreConfigs(ctx context.Context, datastores []pb // collectDatastoreNamespaces collects namespace information for a datastore // using CLI first, then filesystem fallback. -func (c *Collector) collectDatastoreNamespaces(ds pbsDatastore, datastoreDir string) error { +func (c *Collector) collectDatastoreNamespaces(ctx context.Context, ds pbsDatastore, datastoreDir string) error { c.logger.Debug("Collecting namespaces for datastore %s (path: %s)", ds.Name, ds.Path) // Write location is deterministic; if excluded, skip the whole operation. outputPath := filepath.Join(datastoreDir, fmt.Sprintf("%s_namespaces.json", ds.Name)) @@ -65,7 +66,12 @@ func (c *Collector) collectDatastoreNamespaces(ds pbsDatastore, datastoreDir str return nil } - namespaces, fromFallback, err := listNamespacesFunc(ds.Name, ds.Path) + ioTimeout := time.Duration(0) + if c.config != nil && c.config.FsIoTimeoutSeconds > 0 { + ioTimeout = time.Duration(c.config.FsIoTimeoutSeconds) * time.Second + } + + namespaces, fromFallback, err := listNamespacesFunc(ctx, ds.Name, ds.Path, ioTimeout) if err != nil { return err } @@ -190,9 +196,22 @@ func (c *Collector) processPxarDatastore(ctx context.Context, ds pbsDatastore, m return nil } - stat, err := os.Stat(ds.Path) - if err != nil || !stat.IsDir() { - c.logger.Debug("Skipping PXAR metadata for datastore %s (path not accessible: %s)", ds.Name, ds.Path) + ioTimeout := time.Duration(0) + if c.config != nil && c.config.FsIoTimeoutSeconds > 0 { + ioTimeout = time.Duration(c.config.FsIoTimeoutSeconds) * time.Second + } + + stat, err := safefs.Stat(ctx, ds.Path, ioTimeout) + if err != nil { + if errors.Is(err, safefs.ErrTimeout) { + c.logger.Warning("Skipping PXAR metadata for datastore %s (path=%s): filesystem probe timed out (%v)", ds.Name, ds.Path, err) + return nil + } + c.logger.Debug("Skipping PXAR metadata for datastore %s (path not accessible: %s): %v", ds.Name, ds.Path, err) + return nil + } + if !stat.IsDir() { + c.logger.Debug("Skipping PXAR metadata for datastore %s (path not a directory: %s)", ds.Name, ds.Path) return nil } @@ -229,7 +248,10 @@ func (c *Collector) processPxarDatastore(ctx context.Context, ds pbsDatastore, m ScannedAt: time.Now(), } - if dirs, err := c.sampleDirectories(ctx, ds.Path, 2, 30); err == nil && len(dirs) > 0 { + if dirs, err := c.sampleDirectoriesBounded(ctx, ds.Path, 2, 30, ioTimeout); errors.Is(err, safefs.ErrTimeout) { + c.logger.Warning("Skipping PXAR metadata for datastore %s (path=%s): directory sampling timed out (%v)", ds.Name, ds.Path, err) + return nil + } else if err == nil && len(dirs) > 0 { meta.SampleDirectories = dirs c.logger.Debug("PXAR: datastore %s -> selected %d sample directories", ds.Name, len(dirs)) } else if err != nil { @@ -241,7 +263,10 @@ func (c *Collector) processPxarDatastore(ctx context.Context, ds pbsDatastore, m includePatterns = []string{"*.pxar", "*.pxar.*", "catalog.pxar", "catalog.pxar.*"} } excludePatterns := c.config.PxarFileExcludePatterns - if files, err := c.sampleFiles(ctx, ds.Path, includePatterns, excludePatterns, 8, 200); err == nil && len(files) > 0 { + if files, err := c.sampleFilesBounded(ctx, ds.Path, includePatterns, excludePatterns, 8, 200, ioTimeout); errors.Is(err, safefs.ErrTimeout) { + c.logger.Warning("Skipping PXAR metadata for datastore %s (path=%s): file sampling timed out (%v)", ds.Name, ds.Path, err) + return nil + } else if err == nil && len(files) > 0 { meta.SamplePxarFiles = files c.logger.Debug("PXAR: datastore %s -> selected %d sample pxar files", ds.Name, len(files)) } else if err != nil { @@ -257,15 +282,27 @@ func (c *Collector) processPxarDatastore(ctx context.Context, ds pbsDatastore, m return err } - if err := c.writePxarSubdirReport(filepath.Join(dsDir, fmt.Sprintf("%s_subdirs.txt", ds.Name)), ds); err != nil { + if err := c.writePxarSubdirReport(ctx, filepath.Join(dsDir, fmt.Sprintf("%s_subdirs.txt", ds.Name)), ds, ioTimeout); err != nil { + if errors.Is(err, safefs.ErrTimeout) { + c.logger.Warning("Skipping PXAR metadata for datastore %s (path=%s): subdir report timed out (%v)", ds.Name, ds.Path, err) + return nil + } return err } - if err := c.writePxarListReport(filepath.Join(dsDir, fmt.Sprintf("%s_vm_pxar_list.txt", ds.Name)), ds, "vm"); err != nil { + if err := c.writePxarListReport(ctx, filepath.Join(dsDir, fmt.Sprintf("%s_vm_pxar_list.txt", ds.Name)), ds, "vm", ioTimeout); err != nil { + if errors.Is(err, safefs.ErrTimeout) { + c.logger.Warning("Skipping PXAR metadata for datastore %s (path=%s): VM list report timed out (%v)", ds.Name, ds.Path, err) + return nil + } return err } - if err := c.writePxarListReport(filepath.Join(dsDir, fmt.Sprintf("%s_ct_pxar_list.txt", ds.Name)), ds, "ct"); err != nil { + if err := c.writePxarListReport(ctx, filepath.Join(dsDir, fmt.Sprintf("%s_ct_pxar_list.txt", ds.Name)), ds, "ct", ioTimeout); err != nil { + if errors.Is(err, safefs.ErrTimeout) { + c.logger.Warning("Skipping PXAR metadata for datastore %s (path=%s): CT list report timed out (%v)", ds.Name, ds.Path, err) + return nil + } return err } @@ -273,14 +310,17 @@ func (c *Collector) processPxarDatastore(ctx context.Context, ds pbsDatastore, m return nil } -func (c *Collector) writePxarSubdirReport(target string, ds pbsDatastore) error { +func (c *Collector) writePxarSubdirReport(ctx context.Context, target string, ds pbsDatastore, ioTimeout time.Duration) error { c.logger.Debug("Writing PXAR subdirectory report for datastore %s", ds.Name) var builder strings.Builder builder.WriteString(fmt.Sprintf("# Datastore subdirectories in %s generated on %s\n", ds.Path, time.Now().Format(time.RFC1123))) builder.WriteString(fmt.Sprintf("# Datastore: %s\n", ds.Name)) - entries, err := os.ReadDir(ds.Path) + entries, err := safefs.ReadDir(ctx, ds.Path, ioTimeout) if err != nil { + if errors.Is(err, safefs.ErrTimeout) { + return err + } builder.WriteString(fmt.Sprintf("# Unable to read datastore path: %v\n", err)) return c.writeReportFile(target, []byte(builder.String())) } @@ -305,7 +345,7 @@ func (c *Collector) writePxarSubdirReport(target string, ds pbsDatastore) error return nil } -func (c *Collector) writePxarListReport(target string, ds pbsDatastore, subDir string) error { +func (c *Collector) writePxarListReport(ctx context.Context, target string, ds pbsDatastore, subDir string, ioTimeout time.Duration) error { c.logger.Debug("Writing PXAR file list for datastore %s subdir %s", ds.Name, subDir) basePath := filepath.Join(ds.Path, subDir) @@ -314,8 +354,11 @@ func (c *Collector) writePxarListReport(target string, ds pbsDatastore, subDir s builder.WriteString(fmt.Sprintf("# Datastore: %s, Subdirectory: %s\n", ds.Name, subDir)) builder.WriteString("# Format: permissions size date name\n") - entries, err := os.ReadDir(basePath) + entries, err := safefs.ReadDir(ctx, basePath, ioTimeout) if err != nil { + if errors.Is(err, safefs.ErrTimeout) { + return err + } builder.WriteString(fmt.Sprintf("# Unable to read directory: %v\n", err)) if writeErr := c.writeReportFile(target, []byte(builder.String())); writeErr != nil { return writeErr @@ -339,8 +382,13 @@ func (c *Collector) writePxarListReport(target string, ds pbsDatastore, subDir s if !strings.HasSuffix(entry.Name(), ".pxar") { continue } - info, err := entry.Info() + + fullPath := filepath.Join(basePath, entry.Name()) + info, err := safefs.Stat(ctx, fullPath, ioTimeout) if err != nil { + if errors.Is(err, safefs.ErrTimeout) { + return err + } continue } files = append(files, infoEntry{ diff --git a/internal/backup/collector_pbs_extra_test.go b/internal/backup/collector_pbs_extra_test.go index d80dae4..bdb822f 100644 --- a/internal/backup/collector_pbs_extra_test.go +++ b/internal/backup/collector_pbs_extra_test.go @@ -7,6 +7,7 @@ import ( "os" "path/filepath" "testing" + "time" "github.com/tis24dev/proxsave/internal/pbs" "github.com/tis24dev/proxsave/internal/types" @@ -95,7 +96,7 @@ func TestCollectDatastoreNamespacesSuccessAndError(t *testing.T) { } origList := listNamespacesFunc - listNamespacesFunc = func(name, path string) ([]pbs.Namespace, bool, error) { + listNamespacesFunc = func(_ context.Context, name, path string, _ time.Duration) ([]pbs.Namespace, bool, error) { if name != ds.Name || path != ds.Path { t.Fatalf("unexpected args %s %s", name, path) } @@ -103,7 +104,7 @@ func TestCollectDatastoreNamespacesSuccessAndError(t *testing.T) { } t.Cleanup(func() { listNamespacesFunc = origList }) - if err := c.collectDatastoreNamespaces(ds, targetDir); err != nil { + if err := c.collectDatastoreNamespaces(context.Background(), ds, targetDir); err != nil { t.Fatalf("collectDatastoreNamespaces error: %v", err) } nsPath := filepath.Join(targetDir, "store_namespaces.json") @@ -111,10 +112,10 @@ func TestCollectDatastoreNamespacesSuccessAndError(t *testing.T) { t.Fatalf("expected namespaces file, got %v", err) } - listNamespacesFunc = func(string, string) ([]pbs.Namespace, bool, error) { + listNamespacesFunc = func(context.Context, string, string, time.Duration) ([]pbs.Namespace, bool, error) { return nil, false, errors.New("fail") } - if err := c.collectDatastoreNamespaces(ds, targetDir); err == nil { + if err := c.collectDatastoreNamespaces(context.Background(), ds, targetDir); err == nil { t.Fatalf("expected error when namespace listing fails") } } diff --git a/internal/backup/collector_pbs_test.go b/internal/backup/collector_pbs_test.go index 61180ee..e44fddf 100644 --- a/internal/backup/collector_pbs_test.go +++ b/internal/backup/collector_pbs_test.go @@ -253,7 +253,7 @@ func TestHasTapeSupportHasDrives(t *testing.T) { } func TestCollectDatastoreNamespacesSuccess(t *testing.T) { - stubListNamespaces(t, func(name, path string) ([]pbs.Namespace, bool, error) { + stubListNamespaces(t, func(_ context.Context, name, path string, _ time.Duration) ([]pbs.Namespace, bool, error) { if name != "store1" || path != "/fake" { t.Fatalf("unexpected datastore %s %s", name, path) } @@ -270,7 +270,7 @@ func TestCollectDatastoreNamespacesSuccess(t *testing.T) { } ds := pbsDatastore{Name: "store1", Path: "/fake"} - if err := collector.collectDatastoreNamespaces(ds, dsDir); err != nil { + if err := collector.collectDatastoreNamespaces(context.Background(), ds, dsDir); err != nil { t.Fatalf("collectDatastoreNamespaces failed: %v", err) } @@ -289,7 +289,7 @@ func TestCollectDatastoreNamespacesSuccess(t *testing.T) { } func TestCollectDatastoreNamespacesError(t *testing.T) { - stubListNamespaces(t, func(string, string) ([]pbs.Namespace, bool, error) { + stubListNamespaces(t, func(context.Context, string, string, time.Duration) ([]pbs.Namespace, bool, error) { return nil, false, fmt.Errorf("boom") }) @@ -299,14 +299,14 @@ func TestCollectDatastoreNamespacesError(t *testing.T) { t.Fatalf("failed to create datastore dir: %v", err) } - err := collector.collectDatastoreNamespaces(pbsDatastore{Name: "store1"}, dsDir) + err := collector.collectDatastoreNamespaces(context.Background(), pbsDatastore{Name: "store1"}, dsDir) if err == nil || !strings.Contains(err.Error(), "boom") { t.Fatalf("expected error from list namespaces, got %v", err) } } func TestCollectDatastoreConfigsDryRun(t *testing.T) { - stubListNamespaces(t, func(string, string) ([]pbs.Namespace, bool, error) { + stubListNamespaces(t, func(context.Context, string, string, time.Duration) ([]pbs.Namespace, bool, error) { return []pbs.Namespace{{Ns: ""}}, false, nil }) @@ -395,7 +395,7 @@ func TestCollectUserConfigsMissingUserList(t *testing.T) { } } -func stubListNamespaces(t *testing.T, fn func(string, string) ([]pbs.Namespace, bool, error)) { +func stubListNamespaces(t *testing.T, fn func(context.Context, string, string, time.Duration) ([]pbs.Namespace, bool, error)) { t.Helper() orig := listNamespacesFunc listNamespacesFunc = fn diff --git a/internal/backup/collector_pve.go b/internal/backup/collector_pve.go index 910ec74..766afa7 100644 --- a/internal/backup/collector_pve.go +++ b/internal/backup/collector_pve.go @@ -11,8 +11,9 @@ import ( "path/filepath" "sort" "strings" - "syscall" "time" + + "github.com/tis24dev/proxsave/internal/safefs" ) type pveStorageEntry struct { @@ -660,6 +661,7 @@ func (c *Collector) collectPVECommands(ctx context.Context, clustered bool) (*pv c.logger.Debug("Skipping cluster runtime commands: BACKUP_CLUSTER_CONFIG=false (clustered=%v)", clustered) } + // Storage status hostname, _ := os.Hostname() nodeName := shortHostname(hostname) if nodeName == "" { @@ -718,47 +720,50 @@ func (c *Collector) collectPVECommands(ctx context.Context, clustered bool) (*pv } func parseNodeStorageList(data []byte) ([]pveStorageEntry, error) { - var raw []struct { - Storage string `json:"storage"` - Name string `json:"name"` - Path string `json:"path"` - Type string `json:"type"` - Content string `json:"content"` - Active json.RawMessage `json:"active"` - Enabled json.RawMessage `json:"enabled"` - Status string `json:"status"` - } - if err := json.Unmarshal(data, &raw); err != nil { - return nil, err - } - - parseBool := func(raw json.RawMessage) *bool { - if len(raw) == 0 { + parseOptionalBool := func(value any) *bool { + if value == nil { return nil } - var b bool - if err := json.Unmarshal(raw, &b); err == nil { + switch v := value.(type) { + case bool: + b := v return &b - } - var i int - if err := json.Unmarshal(raw, &i); err == nil { - v := i != 0 - return &v - } - var s string - if err := json.Unmarshal(raw, &s); err == nil { - switch strings.ToLower(strings.TrimSpace(s)) { + case float64: + b := v != 0 + return &b + case string: + s := strings.ToLower(strings.TrimSpace(v)) + if s == "" { + return nil + } + switch s { case "1", "true", "yes", "on": - v := true - return &v + b := true + return &b case "0", "false", "no", "off": - v := false - return &v + b := false + return &b + default: + return nil } + default: + return nil } - return nil } + var raw []struct { + Storage string `json:"storage"` + Name string `json:"name"` + Path string `json:"path"` + Type string `json:"type"` + Content string `json:"content"` + Active any `json:"active"` + Enabled any `json:"enabled"` + Status string `json:"status"` + } + if err := json.Unmarshal(data, &raw); err != nil { + return nil, err + } seen := make(map[string]struct{}) entries := make([]pveStorageEntry, 0, len(raw)) for _, item := range raw { @@ -778,8 +783,8 @@ func parseNodeStorageList(data []byte) ([]pveStorageEntry, error) { Path: strings.TrimSpace(item.Path), Type: strings.TrimSpace(item.Type), Content: strings.TrimSpace(item.Content), - Active: parseBool(item.Active), - Enabled: parseBool(item.Enabled), + Active: parseOptionalBool(item.Active), + Enabled: parseOptionalBool(item.Enabled), Status: strings.TrimSpace(item.Status), }) } @@ -1026,23 +1031,28 @@ func (c *Collector) collectPVEStorageMetadata(ctx context.Context, storages []pv summary.WriteString(time.Now().Format(time.RFC3339)) summary.WriteString("\n# Format: NAME|PATH|TYPE|CONTENT\n\n") - processed := 0 + ioTimeout := time.Duration(0) + if c.config != nil && c.config.FsIoTimeoutSeconds > 0 { + ioTimeout = time.Duration(c.config.FsIoTimeoutSeconds) * time.Second + } + formatRuntime := func(storage pveStorageEntry) string { parts := make([]string, 0, 3) - if status := strings.TrimSpace(storage.Status); status != "" { - parts = append(parts, "status="+status) - } if storage.Active != nil { parts = append(parts, fmt.Sprintf("active=%v", *storage.Active)) } if storage.Enabled != nil { parts = append(parts, fmt.Sprintf("enabled=%v", *storage.Enabled)) } + if status := strings.TrimSpace(storage.Status); status != "" { + parts = append(parts, "status="+status) + } if len(parts) == 0 { return "" } return " (" + strings.Join(parts, " ") + ")" } + unavailableReason := func(storage pveStorageEntry) string { if storage.Enabled != nil && !*storage.Enabled { return "enabled=false" @@ -1062,6 +1072,8 @@ func (c *Collector) collectPVEStorageMetadata(ctx context.Context, storages []pv } return "" } + + processed := 0 for _, storage := range storages { if storage.Path == "" { continue @@ -1069,16 +1081,24 @@ func (c *Collector) collectPVEStorageMetadata(ctx context.Context, storages []pv if err := ctx.Err(); err != nil { return err } + if reason := unavailableReason(storage); reason != "" { c.logger.Warning("Skipping datastore %s (path=%s)%s: not available (%s)", storage.Name, storage.Path, formatRuntime(storage), reason) continue } - // NOTE: os.Stat() on an unreachable mount can hang inside the kernel. - // The availability filter above reduces the likelihood by skipping inactive/unavailable storages reported by PVE. - c.logger.Info("Processing datastore %s (path=%s)%s", storage.Name, storage.Path, formatRuntime(storage)) - if stat, err := os.Stat(storage.Path); err != nil || !stat.IsDir() { - c.logger.Debug("Skipping datastore %s (path not accessible: %s)", storage.Name, storage.Path) + c.logger.Info("Probing datastore %s (path=%s)%s", storage.Name, storage.Path, formatRuntime(storage)) + stat, err := safefs.Stat(ctx, storage.Path, ioTimeout) + if err != nil { + if errors.Is(err, safefs.ErrTimeout) { + c.logger.Warning("Skipping datastore %s (path=%s)%s: filesystem probe timed out (%v)", storage.Name, storage.Path, formatRuntime(storage), err) + } else { + c.logger.Debug("Skipping datastore %s (path not accessible: %s): %v", storage.Name, storage.Path, err) + } + continue + } + if !stat.IsDir() { + c.logger.Debug("Skipping datastore %s (path not a directory: %s)", storage.Name, storage.Path) continue } @@ -1112,7 +1132,11 @@ func (c *Collector) collectPVEStorageMetadata(ctx context.Context, storages []pv ScannedAt: time.Now(), } - dirSamples, dirSampleErr := c.sampleDirectories(ctx, storage.Path, 2, 20) + dirSamples, dirSampleErr := c.sampleDirectoriesBounded(ctx, storage.Path, 2, 20, ioTimeout) + if errors.Is(dirSampleErr, safefs.ErrTimeout) { + c.logger.Warning("Skipping datastore %s (path=%s)%s: directory sampling timed out (%v)", storage.Name, storage.Path, formatRuntime(storage), dirSampleErr) + continue + } if dirSampleErr != nil { c.logger.Debug("Directory sample for datastore %s failed: %v", storage.Name, dirSampleErr) } @@ -1120,7 +1144,11 @@ func (c *Collector) collectPVEStorageMetadata(ctx context.Context, storages []pv meta.SampleDirectories = dirSamples } - diskUsageText, diskUsageErr := c.describeDiskUsage(storage.Path) + diskUsageText, diskUsageErr := c.describeDiskUsage(ctx, storage.Path, ioTimeout) + if errors.Is(diskUsageErr, safefs.ErrTimeout) { + c.logger.Warning("Skipping datastore %s (path=%s)%s: disk usage probe timed out (%v)", storage.Name, storage.Path, formatRuntime(storage), diskUsageErr) + continue + } if diskUsageErr != nil { c.logger.Debug("Disk usage summary for %s failed: %v", storage.Name, diskUsageErr) } else { @@ -1137,7 +1165,11 @@ func (c *Collector) collectPVEStorageMetadata(ctx context.Context, storages []pv } excludePatterns := c.config.PxarFileExcludePatterns - fileSummaries, sampleFileErr := c.sampleFiles(ctx, storage.Path, includePatterns, excludePatterns, 3, 100) + fileSummaries, sampleFileErr := c.sampleFilesBounded(ctx, storage.Path, includePatterns, excludePatterns, 3, 100, ioTimeout) + if errors.Is(sampleFileErr, safefs.ErrTimeout) { + c.logger.Warning("Skipping datastore %s (path=%s)%s: file sampling timed out (%v)", storage.Name, storage.Path, formatRuntime(storage), sampleFileErr) + continue + } if sampleFileErr != nil { c.logger.Debug("Backup file sample for %s failed: %v", storage.Name, sampleFileErr) } else if len(fileSummaries) > 0 { @@ -1153,7 +1185,11 @@ func (c *Collector) collectPVEStorageMetadata(ctx context.Context, storages []pv return err } - fileSampleLines, fileSampleErr := c.sampleMetadataFileStats(ctx, storage.Path, 3, 10) + fileSampleLines, fileSampleErr := c.sampleMetadataFileStats(ctx, storage.Path, 3, 10, ioTimeout) + if errors.Is(fileSampleErr, safefs.ErrTimeout) { + c.logger.Warning("Skipping datastore %s (path=%s)%s: metadata sampling timed out (%v)", storage.Name, storage.Path, formatRuntime(storage), fileSampleErr) + continue + } if fileSampleErr != nil { c.logger.Debug("General file sampling for %s failed: %v", storage.Name, fileSampleErr) } @@ -1164,7 +1200,7 @@ func (c *Collector) collectPVEStorageMetadata(ctx context.Context, storages []pv if c.config.BackupPVEBackupFiles { c.logger.Info("Analyzing PVE backup files in datastore: %s", storage.Name) - if err := c.collectDetailedPVEBackups(ctx, storage, metaDir); err != nil { + if err := c.collectDetailedPVEBackups(ctx, storage, metaDir, ioTimeout); err != nil { c.logger.Warning("Detailed backup analysis for %s failed: %v", storage.Name, err) } } else { @@ -1183,7 +1219,7 @@ func (c *Collector) collectPVEStorageMetadata(ctx context.Context, storages []pv return nil } -func (c *Collector) collectDetailedPVEBackups(ctx context.Context, storage pveStorageEntry, metaDir string) error { +func (c *Collector) collectDetailedPVEBackups(ctx context.Context, storage pveStorageEntry, metaDir string, ioTimeout time.Duration) error { if err := ctx.Err(); err != nil { return err } @@ -1234,56 +1270,86 @@ func (c *Collector) collectDetailedPVEBackups(ctx context.Context, storage pveSt } } - walkErr := filepath.WalkDir(storage.Path, func(path string, d fs.DirEntry, walkErr error) error { - if walkErr != nil { - c.logger.Debug("Skipping %s: %v", path, walkErr) - return nil - } + type dirItem struct { + path string + } + stack := []dirItem{{path: storage.Path}} + + for len(stack) > 0 { if err := ctx.Err(); err != nil { return err } - if d.IsDir() { - return nil + + item := stack[len(stack)-1] + stack = stack[:len(stack)-1] + + if c.shouldExclude(item.path) { + continue } - info, err := d.Info() + entries, err := safefs.ReadDir(ctx, item.path, ioTimeout) if err != nil { - c.logger.Debug("Failed to stat %s: %v", path, err) - return nil + if errors.Is(err, safefs.ErrTimeout) { + return err + } + c.logger.Debug("Skipping %s: %v", item.path, err) + continue } - base := filepath.Base(path) - matched := false - for _, w := range writers { - if matchPattern(base, w.pattern) { - matched = true - if err := w.Write(path, info); err != nil { - c.logger.Debug("Failed to log %s for pattern %s: %v", path, w.pattern, err) + for _, entry := range entries { + if err := ctx.Err(); err != nil { + return err + } + + name := entry.Name() + fullPath := filepath.Join(item.path, name) + if c.shouldExclude(fullPath) { + continue + } + if entry.IsDir() { + stack = append(stack, dirItem{path: fullPath}) + continue + } + + matchedWriters := make([]*patternWriter, 0, 2) + for _, w := range writers { + if matchPattern(name, w.pattern) { + matchedWriters = append(matchedWriters, w) } } - } + if len(matchedWriters) == 0 { + continue + } - if !matched { - return nil - } + info, err := safefs.Stat(ctx, fullPath, ioTimeout) + if err != nil { + if errors.Is(err, safefs.ErrTimeout) { + return err + } + c.logger.Debug("Failed to stat %s: %v", fullPath, err) + continue + } - totalFiles++ - totalSize += info.Size() + for _, w := range matchedWriters { + if err := w.Write(fullPath, info); err != nil { + c.logger.Debug("Failed to log %s for pattern %s: %v", fullPath, w.pattern, err) + } + } + + totalFiles++ + totalSize += info.Size() - if smallDir != "" && info.Size() <= c.config.MaxPVEBackupSizeBytes { - if err := c.copyBackupSample(ctx, path, smallDir, fmt.Sprintf("small PVE backup %s", filepath.Base(path))); err != nil { - c.logger.Debug("Failed to copy small backup %s: %v", path, err) + if smallDir != "" && info.Size() <= c.config.MaxPVEBackupSizeBytes { + if err := c.copyBackupSample(ctx, fullPath, smallDir, fmt.Sprintf("small PVE backup %s", name)); err != nil { + c.logger.Debug("Failed to copy small backup %s: %v", fullPath, err) + } } - } - if includeDir != "" && strings.Contains(path, includePattern) { - if err := c.copyBackupSample(ctx, path, includeDir, fmt.Sprintf("selected PVE backup %s", filepath.Base(path))); err != nil { - c.logger.Debug("Failed to copy pattern backup %s: %v", path, err) + if includeDir != "" && strings.Contains(fullPath, includePattern) { + if err := c.copyBackupSample(ctx, fullPath, includeDir, fmt.Sprintf("selected PVE backup %s", name)); err != nil { + c.logger.Debug("Failed to copy pattern backup %s: %v", fullPath, err) + } } } - return nil - }) - if walkErr != nil { - return walkErr } if err := c.writePatternSummary(storage, analysisDir, writers, totalFiles, totalSize); err != nil { @@ -1853,9 +1919,146 @@ func (c *Collector) parseStorageConfigEntries() []pveStorageEntry { return entries } -func (c *Collector) describeDiskUsage(path string) (string, error) { - var stat syscall.Statfs_t - if err := syscall.Statfs(path, &stat); err != nil { +func (c *Collector) sampleDirectoriesBounded(ctx context.Context, root string, maxDepth, limit int, ioTimeout time.Duration) ([]string, error) { + results := make([]string, 0, limit) + if limit <= 0 || maxDepth <= 0 { + return results, nil + } + + root = filepath.Clean(root) + stack := []string{root} + + for len(stack) > 0 && len(results) < limit { + if err := ctx.Err(); err != nil { + return results, err + } + dirPath := stack[len(stack)-1] + stack = stack[:len(stack)-1] + + entries, err := safefs.ReadDir(ctx, dirPath, ioTimeout) + if err != nil { + return results, err + } + + for _, entry := range entries { + if err := ctx.Err(); err != nil { + return results, err + } + if !entry.IsDir() { + continue + } + child := filepath.Join(dirPath, entry.Name()) + if c.shouldExclude(child) { + continue + } + + rel, relErr := filepath.Rel(root, child) + if relErr != nil || rel == "." || strings.HasPrefix(rel, "..") { + continue + } + rel = filepath.ToSlash(rel) + depth := strings.Count(rel, "/") + if depth >= maxDepth { + continue + } + + results = append(results, rel) + if len(results) >= limit { + break + } + if depth < maxDepth-1 { + stack = append(stack, child) + } + } + } + + return results, nil +} + +func (c *Collector) sampleFilesBounded(ctx context.Context, root string, includePatterns, excludePatterns []string, maxDepth, limit int, ioTimeout time.Duration) ([]FileSummary, error) { + results := make([]FileSummary, 0, limit) + if limit <= 0 { + return results, nil + } + + root = filepath.Clean(root) + stack := []string{root} + + for len(stack) > 0 && len(results) < limit { + if err := ctx.Err(); err != nil { + return results, err + } + dirPath := stack[len(stack)-1] + stack = stack[:len(stack)-1] + + entries, err := safefs.ReadDir(ctx, dirPath, ioTimeout) + if err != nil { + return results, err + } + + for _, entry := range entries { + if err := ctx.Err(); err != nil { + return results, err + } + + name := entry.Name() + full := filepath.Join(dirPath, name) + if c.shouldExclude(full) { + continue + } + + if entry.IsDir() { + rel, relErr := filepath.Rel(root, full) + if relErr != nil || rel == "." || strings.HasPrefix(rel, "..") { + continue + } + rel = filepath.ToSlash(rel) + depth := strings.Count(rel, "/") + if depth >= maxDepth { + continue + } + stack = append(stack, full) + continue + } + + rel, relErr := filepath.Rel(root, full) + if relErr != nil || rel == "." || strings.HasPrefix(rel, "..") { + continue + } + + if len(excludePatterns) > 0 && matchAnyPattern(excludePatterns, name, rel) { + continue + } + if len(includePatterns) > 0 && !matchAnyPattern(includePatterns, name, rel) { + continue + } + + info, err := safefs.Stat(ctx, full, ioTimeout) + if err != nil { + if errors.Is(err, safefs.ErrTimeout) { + return results, err + } + continue + } + + results = append(results, FileSummary{ + RelativePath: filepath.ToSlash(rel), + SizeBytes: info.Size(), + SizeHuman: FormatBytes(info.Size()), + ModTime: info.ModTime(), + }) + if len(results) >= limit { + break + } + } + } + + return results, nil +} + +func (c *Collector) describeDiskUsage(ctx context.Context, path string, ioTimeout time.Duration) (string, error) { + stat, err := safefs.Statfs(ctx, path, ioTimeout) + if err != nil { return "", err } total := int64(stat.Blocks) * int64(stat.Bsize) @@ -1871,50 +2074,64 @@ func (c *Collector) describeDiskUsage(path string) (string, error) { ), nil } -func (c *Collector) sampleMetadataFileStats(ctx context.Context, root string, maxDepth, limit int) ([]string, error) { +func (c *Collector) sampleMetadataFileStats(ctx context.Context, root string, maxDepth, limit int, ioTimeout time.Duration) ([]string, error) { lines := make([]string, 0, limit) - if limit <= 0 { + if limit <= 0 || maxDepth <= 0 { return lines, nil } root = filepath.Clean(root) - stopErr := errors.New("metadata sample limit reached") + type dirItem struct { + path string + depth int + } + stack := []dirItem{{path: root, depth: 0}} - err := filepath.WalkDir(root, func(path string, d fs.DirEntry, walkErr error) error { - if walkErr != nil { - return walkErr - } + for len(stack) > 0 && len(lines) < limit { if err := ctx.Err(); err != nil { - return err - } - depth := relativeDepth(root, path) - if d.IsDir() { - if depth >= maxDepth { - return filepath.SkipDir - } - return nil + return lines, err } - info, err := d.Info() + item := stack[len(stack)-1] + stack = stack[:len(stack)-1] + + entries, err := safefs.ReadDir(ctx, item.path, ioTimeout) if err != nil { - return nil + return lines, err } - line := fmt.Sprintf("%s %d %s", - info.ModTime().Format(time.RFC3339), - info.Size(), - path, - ) - lines = append(lines, line) - if len(lines) >= limit { - return stopErr - } - return nil - }) + for _, entry := range entries { + if err := ctx.Err(); err != nil { + return lines, err + } - if err != nil && !errors.Is(err, stopErr) { - return lines, err + full := filepath.Join(item.path, entry.Name()) + if entry.IsDir() { + if item.depth+1 >= maxDepth { + continue + } + stack = append(stack, dirItem{path: full, depth: item.depth + 1}) + continue + } + + info, err := safefs.Stat(ctx, full, ioTimeout) + if err != nil { + if errors.Is(err, safefs.ErrTimeout) { + return lines, err + } + continue + } + lines = append(lines, fmt.Sprintf("%s %d %s", + info.ModTime().Format(time.RFC3339), + info.Size(), + full, + )) + if len(lines) >= limit { + break + } + } } + return lines, nil } diff --git a/internal/backup/collector_pve_additional_test.go b/internal/backup/collector_pve_additional_test.go index 92ae4c6..5b5ef98 100644 --- a/internal/backup/collector_pve_additional_test.go +++ b/internal/backup/collector_pve_additional_test.go @@ -166,7 +166,7 @@ func TestCollectDetailedPVEBackups(t *testing.T) { t.Fatalf("MkdirAll: %v", err) } - err := collector.collectDetailedPVEBackups(context.Background(), storage, metaDir) + err := collector.collectDetailedPVEBackups(context.Background(), storage, metaDir, 0) if err != nil { t.Fatalf("collectDetailedPVEBackups error: %v", err) } @@ -201,7 +201,7 @@ func TestCollectDetailedPVEBackups(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) cancel() // Cancel immediately - err := collector.collectDetailedPVEBackups(ctx, storage, metaDir) + err := collector.collectDetailedPVEBackups(ctx, storage, metaDir, 0) if err == nil || err != context.Canceled { t.Errorf("expected context.Canceled error, got: %v", err) } @@ -229,7 +229,7 @@ func TestCollectDetailedPVEBackups(t *testing.T) { t.Fatalf("MkdirAll: %v", err) } - err := collector.collectDetailedPVEBackups(context.Background(), storage, metaDir) + err := collector.collectDetailedPVEBackups(context.Background(), storage, metaDir, 0) if err != nil { t.Fatalf("collectDetailedPVEBackups error: %v", err) } @@ -266,7 +266,7 @@ func TestCollectDetailedPVEBackups(t *testing.T) { t.Fatalf("MkdirAll: %v", err) } - err := collector.collectDetailedPVEBackups(context.Background(), storage, metaDir) + err := collector.collectDetailedPVEBackups(context.Background(), storage, metaDir, 0) if err != nil { t.Fatalf("collectDetailedPVEBackups error: %v", err) } @@ -305,7 +305,7 @@ func TestCollectDetailedPVEBackups(t *testing.T) { t.Fatalf("MkdirAll: %v", err) } - err := collector.collectDetailedPVEBackups(context.Background(), storage, metaDir) + err := collector.collectDetailedPVEBackups(context.Background(), storage, metaDir, 0) if err != nil { t.Fatalf("collectDetailedPVEBackups error: %v", err) } @@ -339,7 +339,7 @@ func TestCollectDetailedPVEBackups(t *testing.T) { t.Fatalf("MkdirAll: %v", err) } - err := collector.collectDetailedPVEBackups(context.Background(), storage, metaDir) + err := collector.collectDetailedPVEBackups(context.Background(), storage, metaDir, 0) if err != nil { t.Fatalf("collectDetailedPVEBackups error: %v", err) } @@ -368,7 +368,7 @@ func TestCollectDetailedPVEBackups(t *testing.T) { t.Fatalf("MkdirAll: %v", err) } - err := collector.collectDetailedPVEBackups(context.Background(), storage, metaDir) + err := collector.collectDetailedPVEBackups(context.Background(), storage, metaDir, 0) if err != nil { t.Fatalf("collectDetailedPVEBackups error: %v", err) } @@ -504,7 +504,7 @@ func TestSampleMetadataFileStats(t *testing.T) { cfg := GetDefaultCollectorConfig() collector := NewCollector(logger, cfg, tmpDir, "pve", false) - lines, err := collector.sampleMetadataFileStats(context.Background(), tmpDir, 3, 10) + lines, err := collector.sampleMetadataFileStats(context.Background(), tmpDir, 3, 10, 0) if err != nil { t.Fatalf("sampleMetadataFileStats error: %v", err) } @@ -535,7 +535,7 @@ func TestSampleMetadataFileStats(t *testing.T) { cfg := GetDefaultCollectorConfig() collector := NewCollector(logger, cfg, tmpDir, "pve", false) - lines, err := collector.sampleMetadataFileStats(context.Background(), tmpDir, 2, 100) + lines, err := collector.sampleMetadataFileStats(context.Background(), tmpDir, 2, 100, 0) if err != nil { t.Fatalf("sampleMetadataFileStats error: %v", err) } @@ -554,7 +554,7 @@ func TestSampleMetadataFileStats(t *testing.T) { cfg := GetDefaultCollectorConfig() collector := NewCollector(logger, cfg, tmpDir, "pve", false) - lines, err := collector.sampleMetadataFileStats(context.Background(), tmpDir, 3, 10) + lines, err := collector.sampleMetadataFileStats(context.Background(), tmpDir, 3, 10, 0) if err != nil { t.Fatalf("sampleMetadataFileStats error: %v", err) } @@ -577,7 +577,7 @@ func TestSampleMetadataFileStats(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) cancel() - _, err := collector.sampleMetadataFileStats(ctx, tmpDir, 3, 10) + _, err := collector.sampleMetadataFileStats(ctx, tmpDir, 3, 10, 0) if err == nil { t.Error("expected context cancelled error") } @@ -593,7 +593,7 @@ func TestSampleMetadataFileStats(t *testing.T) { cfg := GetDefaultCollectorConfig() collector := NewCollector(logger, cfg, tmpDir, "pve", false) - lines, err := collector.sampleMetadataFileStats(context.Background(), tmpDir, 3, 0) + lines, err := collector.sampleMetadataFileStats(context.Background(), tmpDir, 3, 0, 0) if err != nil { t.Fatalf("sampleMetadataFileStats error: %v", err) } diff --git a/internal/backup/collector_pve_parse_test.go b/internal/backup/collector_pve_parse_test.go index 39e18b6..aaea755 100644 --- a/internal/backup/collector_pve_parse_test.go +++ b/internal/backup/collector_pve_parse_test.go @@ -8,7 +8,6 @@ import ( // TestParseNodeStorageList tests parsing PVE storage entries from JSON func TestParseNodeStorageList(t *testing.T) { - boolPtr := func(v bool) *bool { return &v } tests := []struct { name string input string @@ -100,28 +99,16 @@ func TestParseNodeStorageList(t *testing.T) { { name: "mixed valid and empty entries", input: `[ - {"storage": "storage1", "path": "/path1", "type": "dir", "content": "iso"}, - {"storage": "", "path": "/path2", "type": "dir", "content": "backup"}, - {"storage": "storage2", "path": "/path3", "type": "nfs", "content": "images"} - ]`, + {"storage": "storage1", "path": "/path1", "type": "dir", "content": "iso"}, + {"storage": "", "path": "/path2", "type": "dir", "content": "backup"}, + {"storage": "storage2", "path": "/path3", "type": "nfs", "content": "images"} + ]`, expectError: false, expected: []pveStorageEntry{ {Name: "storage1", Path: "/path1", Type: "dir", Content: "iso"}, {Name: "storage2", Path: "/path3", Type: "nfs", Content: "images"}, }, }, - { - name: "runtime status fields", - input: `[ - {"storage": "nfs-backup", "path": "/mnt/backup", "type": "nfs", "content": "backup", "active": 0, "enabled": 1, "status": "unknown"}, - {"storage": "local", "path": "/var/lib/vz", "type": "dir", "content": "iso", "active": true, "enabled": "true", "status": "available"} - ]`, - expectError: false, - expected: []pveStorageEntry{ - {Name: "nfs-backup", Path: "/mnt/backup", Type: "nfs", Content: "backup", Active: boolPtr(false), Enabled: boolPtr(true), Status: "unknown"}, - {Name: "local", Path: "/var/lib/vz", Type: "dir", Content: "iso", Active: boolPtr(true), Enabled: boolPtr(true), Status: "available"}, - }, - }, } for _, tt := range tests { @@ -158,20 +145,34 @@ func TestParseNodeStorageList(t *testing.T) { if entry.Content != tt.expected[i].Content { t.Errorf("entry[%d].Content = %q, want %q", i, entry.Content, tt.expected[i].Content) } - if got, want := entry.Status, tt.expected[i].Status; got != want { - t.Errorf("entry[%d].Status = %q, want %q", i, got, want) - } - if got, want := entry.Active, tt.expected[i].Active; (got == nil) != (want == nil) || (got != nil && want != nil && *got != *want) { - t.Errorf("entry[%d].Active = %v, want %v", i, got, want) - } - if got, want := entry.Enabled, tt.expected[i].Enabled; (got == nil) != (want == nil) || (got != nil && want != nil && *got != *want) { - t.Errorf("entry[%d].Enabled = %v, want %v", i, got, want) - } } }) } } +func TestParseNodeStorageList_RuntimeFields(t *testing.T) { + input := `[ + {"storage": "nfs1", "path": "/mnt/nfs", "type": "nfs", "content": "backup", "active": 1, "enabled": 0, "status": "available"} + ]` + result, err := parseNodeStorageList([]byte(input)) + if err != nil { + t.Fatalf("parseNodeStorageList() unexpected error = %v", err) + } + if len(result) != 1 { + t.Fatalf("parseNodeStorageList() returned %d entries, want 1", len(result)) + } + got := result[0] + if got.Active == nil || *got.Active != true { + t.Fatalf("entry.Active = %#v; want true", got.Active) + } + if got.Enabled == nil || *got.Enabled != false { + t.Fatalf("entry.Enabled = %#v; want false", got.Enabled) + } + if got.Status != "available" { + t.Fatalf("entry.Status = %q; want %q", got.Status, "available") + } +} + // TestParseStorageConfigEntries tests parsing PVE storage.cfg file func TestParseStorageConfigEntries(t *testing.T) { tests := []struct { diff --git a/internal/backup/collector_pxar_datastore_test.go b/internal/backup/collector_pxar_datastore_test.go index 6810fe8..ee63819 100644 --- a/internal/backup/collector_pxar_datastore_test.go +++ b/internal/backup/collector_pxar_datastore_test.go @@ -37,7 +37,7 @@ func TestWritePxarListReportWithFiles(t *testing.T) { ds := pbsDatastore{Name: "ds1", Path: filepath.Join(tmp, "ds1")} target := filepath.Join(tmp, "list.txt") c := NewCollector(newTestLogger(), GetDefaultCollectorConfig(), tmp, types.ProxmoxBS, false) - if err := c.writePxarListReport(target, ds, "ct"); err != nil { + if err := c.writePxarListReport(context.Background(), target, ds, "ct", 0); err != nil { t.Fatalf("writePxarListReport: %v", err) } content, err := os.ReadFile(target) diff --git a/internal/backup/collector_pxar_reports_test.go b/internal/backup/collector_pxar_reports_test.go index 4439d25..e22d903 100644 --- a/internal/backup/collector_pxar_reports_test.go +++ b/internal/backup/collector_pxar_reports_test.go @@ -1,6 +1,7 @@ package backup import ( + "context" "os" "path/filepath" "testing" @@ -16,7 +17,7 @@ func TestWritePxarSubdirReportHandlesMissingPath(t *testing.T) { ds := pbsDatastore{Name: "ds1", Path: filepath.Join(tmp, "missing")} c := NewCollector(newTestLogger(), GetDefaultCollectorConfig(), tmp, types.ProxmoxBS, false) - if err := c.writePxarSubdirReport(target, ds); err != nil { + if err := c.writePxarSubdirReport(context.Background(), target, ds, 0); err != nil { t.Fatalf("writePxarSubdirReport error: %v", err) } content, err := os.ReadFile(target) @@ -39,7 +40,7 @@ func TestWritePxarListReportNoFiles(t *testing.T) { target := filepath.Join(tmp, "list.txt") c := NewCollector(newTestLogger(), GetDefaultCollectorConfig(), tmp, types.ProxmoxBS, false) - if err := c.writePxarListReport(target, ds, "vm"); err != nil { + if err := c.writePxarListReport(context.Background(), target, ds, "vm", 0); err != nil { t.Fatalf("writePxarListReport error: %v", err) } content, err := os.ReadFile(target) diff --git a/internal/backup/optimizations.go b/internal/backup/optimizations.go index 5797495..f31943f 100644 --- a/internal/backup/optimizations.go +++ b/internal/backup/optimizations.go @@ -6,15 +6,10 @@ import ( "crypto/sha256" "encoding/json" "fmt" - "hash" "io" "os" "path/filepath" - "sort" - "strconv" "strings" - "syscall" - "time" "github.com/tis24dev/proxsave/internal/logging" ) @@ -28,18 +23,6 @@ const ( defaultChunkFilePerm = 0o640 ) -type chunkedFileMetadata struct { - Version int `json:"version"` - SizeBytes int64 `json:"size_bytes"` - ChunkSizeBytes int64 `json:"chunk_size_bytes"` - ChunkCount int `json:"chunk_count"` - SHA256 string `json:"sha256,omitempty"` - Mode uint32 `json:"mode"` - UID int `json:"uid"` - GID int `json:"gid"` - ModTimeUnixNano int64 `json:"mod_time_unix_nano"` -} - // OptimizationConfig controls optional preprocessing steps executed before archiving. type OptimizationConfig struct { EnableChunking bool @@ -220,9 +203,6 @@ func chunkLargeFiles(ctx context.Context, logger *logging.Logger, root string, c } return nil } - if d.Type()&os.ModeSymlink != 0 { - return nil - } if strings.HasPrefix(path, chunkDir) { return nil } @@ -240,61 +220,15 @@ func chunkLargeFiles(ctx context.Context, logger *logging.Logger, root string, c return nil } destBase := filepath.Join(chunkDir, rel) - - meta := chunkedFileMetadata{ - Version: 1, - ChunkSizeBytes: chunkSize, - Mode: uint32(info.Mode()), - UID: -1, - GID: -1, - ModTimeUnixNano: info.ModTime().UnixNano(), - } - if stat, ok := info.Sys().(*syscall.Stat_t); ok && stat != nil { - meta.UID = int(stat.Uid) - meta.GID = int(stat.Gid) - } - - result, err := splitFile(path, destBase, chunkSize) - if err != nil { + if err := splitFile(path, destBase, chunkSize); err != nil { logger.Warning("Failed to chunk %s: %v", path, err) return nil } - meta.SizeBytes = result.SizeBytes - meta.ChunkCount = result.ChunkCount - meta.SHA256 = result.SHA256 - - cleanupChunks := func() { - chunks, err := discoverChunks(destBase) - if err != nil { - return - } - for _, c := range chunks { - _ = os.Remove(c.Path) - } - } - - markerPath := path + ".chunked" - payload, err := json.Marshal(meta) - if err != nil { - logger.Warning("Failed to encode chunk metadata for %s: %v", path, err) - cleanupChunks() - return nil - } - if err := os.WriteFile(markerPath, append(payload, '\n'), defaultChunkFilePerm); err != nil { - logger.Warning("Failed to write chunk marker for %s: %v", path, err) - _ = os.Remove(markerPath) - cleanupChunks() - return nil - } - // Best-effort: preserve the original file's mtime on the marker too. - mt := time.Unix(0, meta.ModTimeUnixNano) - _ = os.Chtimes(markerPath, mt, mt) if err := os.Remove(path); err != nil { logger.Warning("Failed to remove original file %s after chunking: %v", path, err) - _ = os.Remove(markerPath) - cleanupChunks() - return nil + } else if err := os.WriteFile(path+".chunked", []byte{}, defaultChunkFilePerm); err != nil { + logger.Warning("Failed to write chunk marker for %s: %v", path, err) } processed++ logger.Debug("Chunked %s into %s", path, destBase) @@ -309,114 +243,64 @@ func chunkLargeFiles(ctx context.Context, logger *logging.Logger, root string, c return nil } -type splitFileResult struct { - ChunkCount int - SizeBytes int64 - SHA256 string -} - -func splitFile(path, destBase string, chunkSize int64) (splitFileResult, error) { +func splitFile(path, destBase string, chunkSize int64) error { if err := os.MkdirAll(filepath.Dir(destBase), defaultChunkDirPerm); err != nil { - return splitFileResult{}, err + return err } in, err := os.Open(path) if err != nil { - return splitFileResult{}, err + return err } defer in.Close() buf := make([]byte, chunkBufferSize) - hasher := sha256.New() - var createdChunks []string - cleanup := func() { - for _, p := range createdChunks { - _ = os.Remove(p) - } - } - chunkCount := 0 - var total int64 + index := 0 for { - chunkPath := fmt.Sprintf("%s.%03d.chunk", destBase, chunkCount+1) - done, n, err := writeChunk(in, chunkPath, buf, chunkSize, hasher) + index++ + chunkPath := fmt.Sprintf("%s.%03d.chunk", destBase, index) + done, err := writeChunk(in, chunkPath, buf, chunkSize) if err != nil { - cleanup() - return splitFileResult{}, err - } - if n > 0 { - createdChunks = append(createdChunks, chunkPath) - total += n - chunkCount++ + return err } if done { break } } - if chunkCount == 0 { - return splitFileResult{}, fmt.Errorf("chunking produced no output for %s", path) - } - return splitFileResult{ - ChunkCount: chunkCount, - SizeBytes: total, - SHA256: fmt.Sprintf("%x", hasher.Sum(nil)), - }, nil + return nil } -func writeChunk(src *os.File, chunkPath string, buf []byte, limit int64, hasher hash.Hash) (bool, int64, error) { +func writeChunk(src *os.File, chunkPath string, buf []byte, limit int64) (bool, error) { out, err := os.OpenFile(chunkPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, defaultChunkFilePerm) if err != nil { - return false, 0, err + return false, err } defer out.Close() var written int64 for written < limit { remaining := limit - written - toRead := buf - if remaining < int64(len(toRead)) { - toRead = toRead[:remaining] + if remaining < int64(len(buf)) { + buf = buf[:remaining] } - n, err := src.Read(toRead) + n, err := src.Read(buf) if n > 0 { - if _, wErr := out.Write(toRead[:n]); wErr != nil { - return false, written, wErr - } - if hasher != nil { - if _, hErr := hasher.Write(toRead[:n]); hErr != nil { - return false, written, hErr - } + if _, wErr := out.Write(buf[:n]); wErr != nil { + return false, wErr } written += int64(n) } if err != nil { if err == io.EOF { - if written == 0 { - _ = out.Close() - _ = os.Remove(chunkPath) - } - return true, written, nil + return true, nil } - return false, written, err + return false, err } if written >= limit { - var probe [1]byte - n, pErr := src.Read(probe[:]) - if n > 0 { - if _, sErr := src.Seek(-int64(n), io.SeekCurrent); sErr != nil { - return false, written, fmt.Errorf("seek after probe: %w", sErr) - } - return false, written, nil - } - if pErr == io.EOF { - return true, written, nil - } - if pErr != nil { - return false, written, pErr - } - return false, written, fmt.Errorf("unexpected empty read while probing for EOF") + return false, nil } } - return false, written, nil + return false, nil } func prefilterFiles(ctx context.Context, logger *logging.Logger, root string, maxSize int64) error { @@ -527,37 +411,8 @@ func normalizeTextFile(path string) (bool, error) { func normalizeConfigFile(path string) (bool, error) { // Config files can be whitespace/ordering-sensitive (e.g. section headers). - // Only perform safe, semantic-preserving normalization: - // 1. Strip UTF-8 BOM - // 2. Normalize CRLF → LF, stray CR → LF - // 3. Strip trailing whitespace from each line - // 4. Consolidate trailing newlines to exactly one - // Line order, leading indentation, comments, and blank lines between - // sections are preserved. - data, err := os.ReadFile(path) - if err != nil { - return false, err - } - if len(data) == 0 { - return false, nil - } - - normalized := bytes.TrimPrefix(data, []byte("\xef\xbb\xbf")) - normalized = bytes.ReplaceAll(normalized, []byte("\r\n"), []byte("\n")) - normalized = bytes.ReplaceAll(normalized, []byte("\r"), []byte("\n")) - - lines := bytes.Split(normalized, []byte("\n")) - for i, line := range lines { - lines[i] = bytes.TrimRight(line, " \t") - } - normalized = bytes.Join(lines, []byte("\n")) - - normalized = append(bytes.TrimRight(normalized, "\n"), '\n') - - if bytes.Equal(data, normalized) { - return false, nil - } - return true, os.WriteFile(path, normalized, defaultChunkFilePerm) + // Only perform safe, semantic-preserving normalization here. + return normalizeTextFile(path) } func minifyJSON(path string) (bool, error) { @@ -565,356 +420,16 @@ func minifyJSON(path string) (bool, error) { if err != nil { return false, err } - var buf bytes.Buffer - if err := json.Compact(&buf, data); err != nil { + var tmp any + if err := json.Unmarshal(data, &tmp); err != nil { return false, err } - compacted := buf.Bytes() - if bytes.Equal(data, compacted) { - return false, nil - } - return true, os.WriteFile(path, compacted, defaultChunkFilePerm) -} - -// ReassembleChunkedFiles locates .chunked marker files under root, -// concatenates the matching .NNN.chunk fragments from the chunked_files -// directory, writes the reassembled file, and cleans up markers and chunks. -func ReassembleChunkedFiles(logger *logging.Logger, root string) error { - chunkDir := filepath.Join(root, "chunked_files") - if _, err := os.Stat(chunkDir); os.IsNotExist(err) { - return nil - } - - var markers []string - err := filepath.WalkDir(root, func(path string, d os.DirEntry, walkErr error) error { - if walkErr != nil { - return walkErr - } - if d.IsDir() { - if path == chunkDir { - return filepath.SkipDir - } - return nil - } - if strings.HasSuffix(path, ".chunked") { - markers = append(markers, path) - } - return nil - }) + minified, err := json.Marshal(tmp) if err != nil { - return fmt.Errorf("walk for chunk markers: %w", err) - } - - if len(markers) == 0 { - return nil - } - - var reassembled int - var incomplete bool - - for _, marker := range markers { - originalPath := strings.TrimSuffix(marker, ".chunked") - rel, err := filepath.Rel(root, originalPath) - if err != nil { - logger.Warning("Failed to compute rel path for %s: %v", originalPath, err) - incomplete = true - continue - } - chunkBase := filepath.Join(chunkDir, rel) - - chunks, err := discoverChunks(chunkBase) - if err != nil || len(chunks) == 0 { - logger.Warning("No chunks found for %s (base=%s): %v", rel, chunkBase, err) - incomplete = true - continue - } - - meta, err := readChunkedFileMetadata(marker) - if err != nil { - logger.Warning("Chunk marker metadata unreadable for %s: %v", rel, err) - incomplete = true - continue - } - - ambiguous, err := validateChunkSet(meta, chunks) - if err != nil { - logger.Warning("Chunk set incomplete for %s: %v", rel, err) - incomplete = true - continue - } - if meta == nil && ambiguous { - logger.Warning("Legacy chunk marker without metadata for %s; reassembly cannot fully verify completeness", rel) - } - - if err := concatenateChunks(originalPath, chunks, meta); err != nil { - logger.Warning("Failed to reassemble %s: %v", rel, err) - incomplete = true - continue - } - if meta != nil { - applyChunkedMetadata(logger, originalPath, meta) - } - - _ = os.Remove(marker) - for _, c := range chunks { - _ = os.Remove(c.Path) - } - logger.Debug("Reassembled %s from %d chunks", rel, len(chunks)) - reassembled++ - } - - // Remove chunked_files dir tree if now empty. - if reassembled > 0 && !incomplete { - removeEmptyDirs(chunkDir) - _ = os.Remove(chunkDir) - } - - return nil -} - -type chunkInfo struct { - Index int - Path string -} - -// discoverChunks returns the numerically-sorted list of chunks for a base. -// Chunks are named ..chunk where is a positive integer. -func discoverChunks(base string) ([]chunkInfo, error) { - dir := filepath.Dir(base) - prefix := filepath.Base(base) + "." - - entries, err := os.ReadDir(dir) - if err != nil { - return nil, err - } - - var chunks []chunkInfo - for _, e := range entries { - name := e.Name() - if !strings.HasPrefix(name, prefix) || !strings.HasSuffix(name, ".chunk") { - continue - } - idxStr := strings.TrimSuffix(strings.TrimPrefix(name, prefix), ".chunk") - idx, err := strconv.Atoi(idxStr) - if err != nil || idx <= 0 { - continue - } - chunks = append(chunks, chunkInfo{Index: idx, Path: filepath.Join(dir, name)}) - } - - sort.Slice(chunks, func(i, j int) bool { return chunks[i].Index < chunks[j].Index }) - return chunks, nil -} - -func concatenateChunks(dest string, chunks []chunkInfo, meta *chunkedFileMetadata) error { - if err := os.MkdirAll(filepath.Dir(dest), defaultChunkDirPerm); err != nil { - return err - } - - tmpDir := filepath.Dir(dest) - tmp, err := os.CreateTemp(tmpDir, "."+filepath.Base(dest)+".reassemble-*.tmp") - if err != nil { - return err - } - tmpPath := tmp.Name() - defer func() { - if tmpPath != "" { - _ = os.Remove(tmpPath) - } - }() - - if err := os.Chmod(tmpPath, defaultChunkFilePerm); err != nil { - tmp.Close() - return err - } - - buf := make([]byte, chunkBufferSize) - - var hasher hash.Hash - if meta != nil && meta.SHA256 != "" { - hasher = sha256.New() - } - - var written int64 - for _, chunk := range chunks { - in, err := os.Open(chunk.Path) - if err != nil { - tmp.Close() - return err - } - var dst io.Writer = tmp - if hasher != nil { - dst = io.MultiWriter(tmp, hasher) - } - n, err := io.CopyBuffer(dst, in, buf) - if cErr := in.Close(); cErr != nil && err == nil { - err = cErr - } - if err != nil { - tmp.Close() - return err - } - written += n - } - - if err := tmp.Close(); err != nil { - return err - } - - if meta != nil { - if meta.SizeBytes > 0 && written != meta.SizeBytes { - return fmt.Errorf("size mismatch after reassembly: got %d bytes, expected %d", written, meta.SizeBytes) - } - if hasher != nil { - got := fmt.Sprintf("%x", hasher.Sum(nil)) - if got != meta.SHA256 { - return fmt.Errorf("sha256 mismatch after reassembly") - } - } - } - - if err := os.Rename(tmpPath, dest); err != nil { - return err - } - tmpPath = "" - return nil -} - -func removeEmptyDirs(root string) { - var dirs []string - filepath.WalkDir(root, func(path string, d os.DirEntry, err error) error { - if err != nil { - return nil - } - if d.IsDir() && path != root { - dirs = append(dirs, path) - } - return nil - }) - for i := len(dirs) - 1; i >= 0; i-- { - os.Remove(dirs[i]) - } -} - -func readChunkedFileMetadata(markerPath string) (*chunkedFileMetadata, error) { - data, err := os.ReadFile(markerPath) - if err != nil { - return nil, err - } - data = bytes.TrimSpace(data) - if len(data) == 0 { - return nil, nil - } - - var meta chunkedFileMetadata - if err := json.Unmarshal(data, &meta); err != nil { - return nil, err - } - if meta.Version != 1 { - return nil, fmt.Errorf("unsupported chunk metadata version %d", meta.Version) - } - if meta.ChunkCount <= 0 || meta.ChunkSizeBytes <= 0 || meta.SizeBytes <= 0 { - return nil, fmt.Errorf("invalid chunk metadata (count=%d chunkSize=%d size=%d)", meta.ChunkCount, meta.ChunkSizeBytes, meta.SizeBytes) - } - return &meta, nil -} - -func validateChunkSet(meta *chunkedFileMetadata, chunks []chunkInfo) (bool, error) { - if len(chunks) == 0 { - return false, fmt.Errorf("no chunk files present") - } - - for i, c := range chunks { - want := i + 1 - if c.Index != want { - return false, fmt.Errorf("missing or out-of-order chunk: expected index %d, got %d", want, c.Index) - } - } - - if meta == nil { - // Legacy (empty marker): best-effort structural validation. - var chunkSize int64 - sizes := make([]int64, len(chunks)) - for i, c := range chunks { - info, err := os.Stat(c.Path) - if err != nil { - return false, fmt.Errorf("stat chunk %s: %w", c.Path, err) - } - if !info.Mode().IsRegular() { - return false, fmt.Errorf("chunk is not a regular file: %s", c.Path) - } - sizes[i] = info.Size() - if sizes[i] > chunkSize { - chunkSize = sizes[i] - } - } - if chunkSize <= 0 { - return false, fmt.Errorf("invalid chunk size inferred") - } - for i := 0; i < len(sizes)-1; i++ { - if sizes[i] != chunkSize { - return false, fmt.Errorf("chunk size mismatch for index %d: got %d, expected %d", i+1, sizes[i], chunkSize) - } - } - last := sizes[len(sizes)-1] - if last <= 0 || last > chunkSize { - return false, fmt.Errorf("last chunk size invalid: %d (chunkSize=%d)", last, chunkSize) - } - return last == chunkSize, nil - } - - if meta.ChunkCount != len(chunks) { - return false, fmt.Errorf("chunk count mismatch: expected %d, found %d", meta.ChunkCount, len(chunks)) - } - - for i, c := range chunks { - info, err := os.Stat(c.Path) - if err != nil { - return false, fmt.Errorf("stat chunk %s: %w", c.Path, err) - } - if !info.Mode().IsRegular() { - return false, fmt.Errorf("chunk is not a regular file: %s", c.Path) - } - expected := meta.ChunkSizeBytes - if i == meta.ChunkCount-1 { - expected = meta.SizeBytes - meta.ChunkSizeBytes*int64(meta.ChunkCount-1) - } - if expected <= 0 { - return false, fmt.Errorf("invalid expected chunk size for index %d", i+1) - } - if info.Size() != expected { - return false, fmt.Errorf("chunk size mismatch for index %d: got %d, expected %d", i+1, info.Size(), expected) - } - } - - return false, nil -} - -func applyChunkedMetadata(logger *logging.Logger, destPath string, meta *chunkedFileMetadata) { - if meta == nil { - return - } - - if meta.UID >= 0 || meta.GID >= 0 { - uid := meta.UID - gid := meta.GID - if uid < 0 { - uid = -1 - } - if gid < 0 { - gid = -1 - } - if err := os.Chown(destPath, uid, gid); err != nil { - logger.Debug("Failed to chown reassembled file %s: %v", destPath, err) - } - } - - if err := os.Chmod(destPath, os.FileMode(meta.Mode)); err != nil { - logger.Debug("Failed to chmod reassembled file %s: %v", destPath, err) + return false, err } - - mt := time.Unix(0, meta.ModTimeUnixNano) - if err := os.Chtimes(destPath, mt, mt); err != nil { - logger.Debug("Failed to set timestamps on reassembled file %s: %v", destPath, err) + if bytes.Equal(bytes.TrimSpace(data), minified) { + return false, nil } + return true, os.WriteFile(path, minified, defaultChunkFilePerm) } diff --git a/internal/backup/optimizations_helpers_test.go b/internal/backup/optimizations_helpers_test.go index 3835e9c..7c6fffa 100644 --- a/internal/backup/optimizations_helpers_test.go +++ b/internal/backup/optimizations_helpers_test.go @@ -2,15 +2,11 @@ package backup import ( "bytes" - "context" "encoding/json" "os" "path/filepath" "strings" "testing" - - "github.com/tis24dev/proxsave/internal/logging" - "github.com/tis24dev/proxsave/internal/types" ) func TestSplitFileAndChunks(t *testing.T) { @@ -22,16 +18,9 @@ func TestSplitFileAndChunks(t *testing.T) { } destBase := filepath.Join(tmp, "chunks", "data.bin") - res, err := splitFile(source, destBase, 16) - if err != nil { + if err := splitFile(source, destBase, 16); err != nil { t.Fatalf("splitFile: %v", err) } - if res.ChunkCount != 3 { - t.Fatalf("chunk count %d, want 3", res.ChunkCount) - } - if res.SizeBytes != int64(len(content)) { - t.Fatalf("split size %d, want %d", res.SizeBytes, len(content)) - } chunks := []string{ destBase + ".001.chunk", @@ -130,231 +119,3 @@ func TestMinifyJSONKeepsData(t *testing.T) { t.Fatalf("unexpected decoded content: %+v", decoded) } } - -// TestMinifyJSONPreservesLargeIntegers verifies that json.Compact preserves -// numeric values that exceed float64 precision (integers > 2^53). -func TestMinifyJSONPreservesLargeIntegers(t *testing.T) { - tmp := t.TempDir() - path := filepath.Join(tmp, "data.json") - // 9007199254740993 is 2^53 + 1, which loses precision under float64. - input := `{"id": 9007199254740993, "name": "test"}` - if err := os.WriteFile(path, []byte(input), 0o640); err != nil { - t.Fatalf("write: %v", err) - } - if _, err := minifyJSON(path); err != nil { - t.Fatalf("minifyJSON: %v", err) - } - got, _ := os.ReadFile(path) - if !bytes.Contains(got, []byte("9007199254740993")) { - t.Fatalf("large integer lost precision: got %q", got) - } -} - -// TestMinifyJSONPreservesKeyOrder verifies that json.Compact does not -// reorder object keys (unlike json.Marshal on map[string]any). -func TestMinifyJSONPreservesKeyOrder(t *testing.T) { - tmp := t.TempDir() - path := filepath.Join(tmp, "data.json") - // Keys deliberately in reverse alphabetical order. - input := "{\n \"z\": 1,\n \"a\": 2\n}\n" - if err := os.WriteFile(path, []byte(input), 0o640); err != nil { - t.Fatalf("write: %v", err) - } - if _, err := minifyJSON(path); err != nil { - t.Fatalf("minifyJSON: %v", err) - } - got, _ := os.ReadFile(path) - expected := `{"z":1,"a":2}` - if string(got) != expected { - t.Fatalf("key order changed: expected %q, got %q", expected, string(got)) - } -} - -// TestMinifyJSONNoopOnAlreadyCompact verifies no disk write when file is -// already compact. -func TestMinifyJSONNoopOnAlreadyCompact(t *testing.T) { - tmp := t.TempDir() - path := filepath.Join(tmp, "data.json") - compact := `{"a":1,"b":2}` - if err := os.WriteFile(path, []byte(compact), 0o640); err != nil { - t.Fatalf("write: %v", err) - } - info1, _ := os.Stat(path) - if _, err := minifyJSON(path); err != nil { - t.Fatalf("minifyJSON: %v", err) - } - info2, _ := os.Stat(path) - if !info1.ModTime().Equal(info2.ModTime()) { - t.Fatalf("file was rewritten even though already compact") - } -} - -// TestReassembleChunkedFilesRoundTrip verifies that chunk + reassemble is a -// lossless round-trip: the reassembled file is byte-identical to the original. -func TestReassembleChunkedFilesRoundTrip(t *testing.T) { - root := t.TempDir() - - // Create a file that will be chunked (96 bytes, threshold 64, chunk size 16). - original := bytes.Repeat([]byte("ABCDEFGHIJKLMNOP"), 6) // 96 bytes - bigFile := filepath.Join(root, "subdir", "large.bin") - if err := os.MkdirAll(filepath.Dir(bigFile), 0o755); err != nil { - t.Fatal(err) - } - if err := os.WriteFile(bigFile, original, 0o640); err != nil { - t.Fatal(err) - } - - logger := logging.New(types.LogLevelError, false) - cfg := OptimizationConfig{ - EnableChunking: true, - ChunkSizeBytes: 16, - ChunkThresholdBytes: 64, - } - - // Apply optimizations (only chunking enabled). - if err := ApplyOptimizations(context.Background(), logger, root, cfg); err != nil { - t.Fatalf("ApplyOptimizations: %v", err) - } - - // Verify the original is gone and the marker exists. - if _, err := os.Stat(bigFile); !os.IsNotExist(err) { - t.Fatalf("expected original removed, stat err=%v", err) - } - if _, err := os.Stat(bigFile + ".chunked"); err != nil { - t.Fatalf("chunk marker missing: %v", err) - } - // Regression: if file size is an exact multiple of chunk size, we must not - // create an extra empty chunk. - if _, err := os.Stat(filepath.Join(root, "chunked_files", "subdir", "large.bin.006.chunk")); err != nil { - t.Fatalf("expected last chunk to exist: %v", err) - } - if _, err := os.Stat(filepath.Join(root, "chunked_files", "subdir", "large.bin.007.chunk")); !os.IsNotExist(err) { - t.Fatalf("expected no extra empty chunk, stat err=%v", err) - } - - // Reassemble. - if err := ReassembleChunkedFiles(logger, root); err != nil { - t.Fatalf("ReassembleChunkedFiles: %v", err) - } - - // Verify byte-identical round-trip. - reassembled, err := os.ReadFile(bigFile) - if err != nil { - t.Fatalf("read reassembled: %v", err) - } - if !bytes.Equal(reassembled, original) { - t.Fatalf("reassembled content differs: got %d bytes, want %d bytes", len(reassembled), len(original)) - } - - // Verify cleanup: marker removed, chunked_files dir removed. - if _, err := os.Stat(bigFile + ".chunked"); !os.IsNotExist(err) { - t.Fatalf("chunk marker should be removed, stat err=%v", err) - } - if _, err := os.Stat(filepath.Join(root, "chunked_files")); !os.IsNotExist(err) { - t.Fatalf("chunked_files dir should be removed, stat err=%v", err) - } -} - -// TestReassembleNoopWithoutChunks verifies ReassembleChunkedFiles is a no-op -// when the directory contains no chunked files. -func TestReassembleNoopWithoutChunks(t *testing.T) { - root := t.TempDir() - filePath := filepath.Join(root, "normal.txt") - if err := os.WriteFile(filePath, []byte("hello"), 0o640); err != nil { - t.Fatal(err) - } - logger := logging.New(types.LogLevelError, false) - if err := ReassembleChunkedFiles(logger, root); err != nil { - t.Fatalf("ReassembleChunkedFiles on clean dir: %v", err) - } - got, _ := os.ReadFile(filePath) - if string(got) != "hello" { - t.Fatalf("file modified unexpectedly: %q", got) - } -} - -// TestNormalizeConfigFileSafeOperations verifies each of the four safe -// operations performed by normalizeConfigFile individually and combined. -func TestNormalizeConfigFileSafeOperations(t *testing.T) { - tests := []struct { - name string - input string - expected string - }{ - { - name: "BOM removal", - input: "\xef\xbb\xbf[section]\n\tkey = value\n", - expected: "[section]\n\tkey = value\n", - }, - { - name: "trailing whitespace per line", - input: "datastore: Data1\n\tpath /mnt/data \n\tgc-schedule 05:00\t\n", - expected: "datastore: Data1\n\tpath /mnt/data\n\tgc-schedule 05:00\n", - }, - { - name: "trailing newlines consolidated", - input: "[section]\n\tkey = value\n\n\n\n", - expected: "[section]\n\tkey = value\n", - }, - { - name: "CRLF normalized to LF", - input: "datastore: X\r\n\tpath /tmp\r\n", - expected: "datastore: X\n\tpath /tmp\n", - }, - { - name: "stray CR normalized to LF", - input: "line1\rline2\n", - expected: "line1\nline2\n", - }, - { - name: "all operations combined", - input: "\xef\xbb\xbfdatastore: D1\r\n\tpath /mnt/d1 \r\n\tgc 05:00\t\r\n\n\n", - expected: "datastore: D1\n\tpath /mnt/d1\n\tgc 05:00\n", - }, - { - name: "clean file unchanged", - input: "datastore: Data1\n\tpath /mnt/data\n\tgc-schedule 05:00\n", - expected: "datastore: Data1\n\tpath /mnt/data\n\tgc-schedule 05:00\n", - }, - { - name: "preserves leading indentation", - input: "\t\tdeep indent\n\t\t\tdeeper\n", - expected: "\t\tdeep indent\n\t\t\tdeeper\n", - }, - { - name: "preserves blank lines between sections", - input: "datastore: A\n\tpath /a\n\ndatastore: B\n\tpath /b\n", - expected: "datastore: A\n\tpath /a\n\ndatastore: B\n\tpath /b\n", - }, - { - name: "preserves comments", - input: "# main config\n; alt comment\nkey = value\n", - expected: "# main config\n; alt comment\nkey = value\n", - }, - { - name: "empty file stays empty", - input: "", - expected: "", - }, - } - - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - tmp := t.TempDir() - path := filepath.Join(tmp, "test.cfg") - if err := os.WriteFile(path, []byte(tc.input), 0o640); err != nil { - t.Fatalf("write: %v", err) - } - if _, err := normalizeConfigFile(path); err != nil { - t.Fatalf("normalizeConfigFile: %v", err) - } - got, err := os.ReadFile(path) - if err != nil { - t.Fatalf("read: %v", err) - } - if string(got) != tc.expected { - t.Fatalf("mismatch\ninput: %q\nexpected: %q\ngot: %q", tc.input, tc.expected, string(got)) - } - }) - } -} diff --git a/internal/checks/checks.go b/internal/checks/checks.go index aee6920..1a0a59e 100644 --- a/internal/checks/checks.go +++ b/internal/checks/checks.go @@ -13,6 +13,7 @@ import ( "time" "github.com/tis24dev/proxsave/internal/logging" + "github.com/tis24dev/proxsave/internal/safefs" ) // createTestFile is a small indirection over os.Create used by permission @@ -28,7 +29,7 @@ var ( osWriteFile = os.WriteFile osSymlink = os.Symlink syncFile = func(f *os.File) error { return f.Sync() } - killFunc = func(pid int, sig syscall.Signal) error { return syscall.Kill(pid, sig) } + killFunc = syscall.Kill // tempRootPath is the runtime path used by CheckTempDirectory. // It is a variable to allow tests to use a safe, isolated temporary directory. @@ -64,6 +65,7 @@ type CheckerConfig struct { MinDiskSecondaryGB float64 MinDiskCloudGB float64 SafetyFactor float64 // Multiplier for estimated size (e.g., 1.5 = 50% buffer) + FsIoTimeout time.Duration LockDirPath string LockFilePath string MaxLockAge time.Duration @@ -94,6 +96,9 @@ func (c *CheckerConfig) Validate() error { if c.SafetyFactor < 1.0 { return fmt.Errorf("safety factor must be >= 1.0, got %.2f", c.SafetyFactor) } + if c.FsIoTimeout < 0 { + return fmt.Errorf("filesystem I/O timeout must be >= 0") + } if c.MaxLockAge <= 0 { return fmt.Errorf("max lock age must be positive") } @@ -341,20 +346,18 @@ func (c *Checker) CheckLockFile() CheckResult { return result } } - } else { - // No usable PID/host metadata; fall back to age-based stale detection. - if age > c.config.MaxLockAge { - c.logger.Warning("Removing stale lock file (age: %v)", age) - if err := osRemove(lockPath); err != nil { - result.Error = fmt.Errorf("failed to remove stale lock: %w", err) - result.Message = result.Error.Error() - return result - } - } else { - result.Message = formatInProgress(age, meta) - c.logger.Error("%s", result.Message) + } else if age > c.config.MaxLockAge { + // Stale lock file, remove it + c.logger.Warning("Removing stale lock file (age: %v)", age) + if err := osRemove(lockPath); err != nil { + result.Error = fmt.Errorf("failed to remove stale lock: %w", err) + result.Message = result.Error.Error() return result } + } else { + result.Message = formatInProgress(age, meta) + c.logger.Error("%s", result.Message) + return result } } @@ -655,6 +658,7 @@ func GetDefaultCheckerConfig(backupPath, logPath, lockDir string) *CheckerConfig MinDiskSecondaryGB: 10.0, MinDiskCloudGB: 10.0, SafetyFactor: 1.5, // 50% buffer over estimated size + FsIoTimeout: 30 * time.Second, LockDirPath: lockDir, LockFilePath: filepath.Join(lockDir, ".backup.lock"), MaxLockAge: 2 * time.Hour, @@ -690,7 +694,7 @@ func (c *Checker) CheckDiskSpaceForEstimate(estimatedSizeGB float64) CheckResult } requiredGB := math.Max(entry.min, estimatedSizeGB*c.config.SafetyFactor) - availableGB, err := diskSpaceGB(entry.path) + availableGB, err := c.diskSpaceGB(entry.path) if err != nil { errMsg := fmt.Sprintf("%s disk space check failed (%s): %v", entry.label, entry.path, err) wrappedErr := fmt.Errorf("%s disk space check failed (%s): %w", entry.label, entry.path, err) @@ -733,7 +737,7 @@ func (c *Checker) CheckDiskSpaceForEstimate(estimatedSizeGB float64) CheckResult } func (c *Checker) checkSingleDisk(label, path string, minGB float64) error { - availableGB, err := diskSpaceGB(path) + availableGB, err := c.diskSpaceGB(path) if err != nil { return fmt.Errorf("%s disk space check failed (%s): %w", label, path, err) } @@ -745,10 +749,14 @@ func (c *Checker) checkSingleDisk(label, path string, minGB float64) error { return nil } -func diskSpaceGB(path string) (float64, error) { - var stat syscall.Statfs_t - if err := syscall.Statfs(path, &stat); err != nil { +func (c *Checker) diskSpaceGB(path string) (float64, error) { + timeout := time.Duration(0) + if c != nil && c.config != nil { + timeout = c.config.FsIoTimeout + } + stat, err := safefs.Statfs(context.Background(), path, timeout) + if err != nil { return 0, err } - return float64(stat.Bavail*uint64(stat.Bsize)) / (1024 * 1024 * 1024), nil + return float64(stat.Bavail) * float64(stat.Bsize) / (1024 * 1024 * 1024), nil } diff --git a/internal/checks/checks_test.go b/internal/checks/checks_test.go index bc1e4e1..1dbc6ff 100644 --- a/internal/checks/checks_test.go +++ b/internal/checks/checks_test.go @@ -155,46 +155,40 @@ func TestCheckLockFile_RemovesLockWhenProcessIsGone(t *testing.T) { tmpDir := t.TempDir() lockPath := filepath.Join(tmpDir, ".backup.lock") - host, _ := os.Hostname() - // Create a "fresh" lock file that references a non-existent PID. - content := fmt.Sprintf("pid=99999\nhost=%s\ntime=%s\n", host, time.Now().Format(time.RFC3339)) - if err := os.WriteFile(lockPath, []byte(content), 0644); err != nil { - t.Fatalf("Failed to create test lock file: %v", err) - } - now := time.Now() - if err := os.Chtimes(lockPath, now, now); err != nil { - t.Fatalf("Failed to set lock file time: %v", err) + hostname, _ := os.Hostname() + lockContent := fmt.Sprintf("pid=%d\nhost=%s\ntime=%s\n", 999999, hostname, time.Now().Format(time.RFC3339)) + if err := os.WriteFile(lockPath, []byte(lockContent), 0o640); err != nil { + t.Fatalf("write lock file: %v", err) } - oldKill := killFunc + config := GetDefaultCheckerConfig(tmpDir, tmpDir, tmpDir) + config.LockFilePath = lockPath + config.MaxLockAge = 24 * time.Hour + config.DryRun = false + + origKill := killFunc + t.Cleanup(func() { killFunc = origKill }) killFunc = func(pid int, sig syscall.Signal) error { - return syscall.ESRCH + if pid == 999999 && sig == 0 { + return syscall.ESRCH + } + return origKill(pid, sig) } - t.Cleanup(func() { killFunc = oldKill }) - config := &CheckerConfig{ - BackupPath: tmpDir, - LogPath: tmpDir, - LockDirPath: tmpDir, - LockFilePath: lockPath, - MaxLockAge: 1 * time.Hour, - DryRun: false, - } checker := NewChecker(logger, config) - result := checker.CheckLockFile() if !result.Passed { t.Fatalf("CheckLockFile should succeed after removing stale lock: %s", result.Message) } t.Cleanup(func() { _ = checker.ReleaseLock() }) - data, err := os.ReadFile(lockPath) + content, err := os.ReadFile(lockPath) if err != nil { t.Fatalf("read lock file: %v", err) } - if !strings.Contains(string(data), fmt.Sprintf("pid=%d\n", os.Getpid())) { - t.Fatalf("expected new lock file to contain current pid, got: %q", string(data)) + if !strings.Contains(string(content), fmt.Sprintf("pid=%d\n", os.Getpid())) { + t.Fatalf("lock file not recreated with current pid; got:\n%s", string(content)) } } @@ -1642,7 +1636,13 @@ func TestCheckDiskSpaceForEstimate_WarnsOnNonCriticalErrorsAndInsufficientSpace( } func TestDiskSpaceGB_ErrorsOnMissingPath(t *testing.T) { - if _, err := diskSpaceGB("/nonexistent/path"); err == nil { + logger := logging.New(types.LogLevelInfo, false) + logger.SetOutput(io.Discard) + + cfg := GetDefaultCheckerConfig(t.TempDir(), t.TempDir(), t.TempDir()) + checker := NewChecker(logger, cfg) + + if _, err := checker.diskSpaceGB("/nonexistent/path"); err == nil { t.Fatalf("expected diskSpaceGB to error on missing path") } } diff --git a/internal/config/config.go b/internal/config/config.go index a3217b9..0fa10a5 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -197,7 +197,8 @@ type Config struct { PVEBackupIncludePattern string BackupCephConfig bool CephConfigPath string - PveshTimeoutSeconds int // Timeout for pvesh commands (seconds). 0 disables the timeout. + PveshTimeoutSeconds int + FsIoTimeoutSeconds int // PBS-specific collection options BackupDatastoreConfigs bool @@ -659,6 +660,10 @@ func (c *Config) parsePVESettings() error { if c.PveshTimeoutSeconds < 0 { c.PveshTimeoutSeconds = 15 } + c.FsIoTimeoutSeconds = c.getInt("FS_IO_TIMEOUT", 30) + if c.FsIoTimeoutSeconds < 0 { + c.FsIoTimeoutSeconds = 30 + } c.BackupSmallPVEBackups = c.getBool("BACKUP_SMALL_PVE_BACKUPS", false) if rawSize := strings.TrimSpace(c.getString("MAX_PVE_BACKUP_SIZE", "")); rawSize != "" { sizeBytes, err := parseSizeToBytes(rawSize) diff --git a/internal/config/templates/backup.env b/internal/config/templates/backup.env index c9f5b63..4ec1c09 100644 --- a/internal/config/templates/backup.env +++ b/internal/config/templates/backup.env @@ -280,8 +280,9 @@ BACKUP_PVE_ACL=true # Access control (users/roles/groups/ACL; rea BACKUP_PVE_JOBS=true BACKUP_PVE_SCHEDULES=true BACKUP_PVE_REPLICATION=true -PVESH_TIMEOUT=15 # seconds (0 disables). Timeout for pvesh calls during PVE collection. BACKUP_PVE_BACKUP_FILES=true +PVESH_TIMEOUT=15 # Timeout (sec) per singolo comando pvesh (0 = disabilitato) +FS_IO_TIMEOUT=30 # Timeout (sec) per I/O filesystem su storage (stat/readdir/statfs). Utile per mount di rete irraggiungibili (0 = disabilitato) BACKUP_SMALL_PVE_BACKUPS=false MAX_PVE_BACKUP_SIZE=100M PVE_BACKUP_INCLUDE_PATTERN= diff --git a/internal/orchestrator/additional_helpers_test.go b/internal/orchestrator/additional_helpers_test.go index df1623b..8b20d73 100644 --- a/internal/orchestrator/additional_helpers_test.go +++ b/internal/orchestrator/additional_helpers_test.go @@ -1231,22 +1231,6 @@ func TestShowCategorySelectionMenu(t *testing.T) { t.Fatalf("expected 2 categories after toggle, got %d", len(cats)) } - // Toggle category 1 twice, attempt continue with none selected, then select 1 and continue - r, w, _ = os.Pipe() - _, _ = w.WriteString("1\n1\nc\n1\nc\n") - _ = w.Close() - os.Stdin = r - cats, err = ShowCategorySelectionMenu(context.Background(), logger, available, SystemTypePVE) - if err != nil { - t.Fatalf("ShowCategorySelectionMenu deselect-all error: %v", err) - } - if len(cats) != 1 { - t.Fatalf("expected 1 category after re-select, got %d", len(cats)) - } - if cats[0].ID != "pve_cluster" { - t.Fatalf("expected pve_cluster selected, got %q", cats[0].ID) - } - // Cancel r, w, _ = os.Pipe() _, _ = w.WriteString("0\n") diff --git a/internal/orchestrator/backup_safety.go b/internal/orchestrator/backup_safety.go index af3c982..5402897 100644 --- a/internal/orchestrator/backup_safety.go +++ b/internal/orchestrator/backup_safety.go @@ -90,7 +90,7 @@ func createSafetyBackup(logger *logging.Logger, selectedCategories []Category, d logger.Info("Creating %s of current configuration...", strings.ToLower(desc)) logger.Debug("%s will be saved to: %s", desc, backupArchive) - file, err := safetyFS.OpenFile(backupArchive, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o600) + file, err := safetyFS.Create(backupArchive) if err != nil { return nil, fmt.Errorf("create backup archive: %w", err) } @@ -186,7 +186,7 @@ func createSafetyBackup(logger *logging.Logger, selectedCategories []Category, d if spec.WriteLocationFile && locationFileName != "" { locationFile := filepath.Join(baseDir, locationFileName) - if err := safetyFS.WriteFile(locationFile, []byte(backupArchive), 0o600); err != nil { + if err := safetyFS.WriteFile(locationFile, []byte(backupArchive), 0644); err != nil { logger.Warning("Could not write backup location file: %v", err) } else { logger.Info("Backup location saved to: %s", locationFile) diff --git a/internal/orchestrator/compatibility.go b/internal/orchestrator/compatibility.go index 433079b..cc2eb3f 100644 --- a/internal/orchestrator/compatibility.go +++ b/internal/orchestrator/compatibility.go @@ -70,16 +70,14 @@ func ValidateCompatibility(manifest *backup.Manifest) error { currentSystem := DetectCurrentSystem() backupType := DetectBackupType(manifest) - // If we can't detect the system types, issue a warning but allow. + // If we can't detect either, issue a warning but allow if currentSystem == SystemTypeUnknown { - if backupType == SystemTypeUnknown { - return fmt.Errorf("warning: cannot detect current system type nor backup type - compatibility cannot be validated") - } - return fmt.Errorf("warning: cannot detect current system type (backup appears to be %s) - restoration may fail", strings.ToUpper(string(backupType))) + return fmt.Errorf("warning: cannot detect current system type - restoration may fail") } if backupType == SystemTypeUnknown { - return fmt.Errorf("warning: cannot detect backup type from manifest (current system is %s) - compatibility cannot be validated", strings.ToUpper(string(currentSystem))) + // If backup type is unknown, we can't validate - issue warning + return nil // Allow but warn in calling code } // Check for incompatibility diff --git a/internal/orchestrator/compatibility_test.go b/internal/orchestrator/compatibility_test.go index f4c356a..5c7ef98 100644 --- a/internal/orchestrator/compatibility_test.go +++ b/internal/orchestrator/compatibility_test.go @@ -2,7 +2,6 @@ package orchestrator import ( "os" - "strings" "testing" "github.com/tis24dev/proxsave/internal/backup" @@ -25,25 +24,6 @@ func TestValidateCompatibility_Mismatch(t *testing.T) { } } -func TestValidateCompatibility_UnknownBackupTypeWarns(t *testing.T) { - orig := compatFS - defer func() { compatFS = orig }() - - fake := NewFakeFS() - defer func() { _ = os.RemoveAll(fake.Root) }() - compatFS = fake - if err := fake.AddDir("/etc/pve"); err != nil { - t.Fatalf("add dir: %v", err) - } - - manifest := &backup.Manifest{} - if err := ValidateCompatibility(manifest); err == nil { - t.Fatalf("expected warning for unknown backup type") - } else if !strings.Contains(err.Error(), "cannot detect backup type") { - t.Fatalf("unexpected warning: %v", err) - } -} - func TestDetectCurrentSystem_Unknown(t *testing.T) { orig := compatFS defer func() { compatFS = orig }() diff --git a/internal/orchestrator/decrypt_test.go b/internal/orchestrator/decrypt_test.go index 16a79bf..59be13c 100644 --- a/internal/orchestrator/decrypt_test.go +++ b/internal/orchestrator/decrypt_test.go @@ -3543,12 +3543,7 @@ echo $count > "%s" # First call (archive) succeeds, second call (metadata) fails if [ "$count" -eq 1 ]; then # Create the target file for archive - # rclone copyto [flags...] - target="$3" - if [[ -z "$target" || "$target" == --* ]]; then - echo "invalid rclone dst: $target" >&2 - exit 2 - fi + target="${@: -1}" echo "archive content" > "$target" exit 0 else diff --git a/internal/orchestrator/deps_test.go b/internal/orchestrator/deps_test.go index dc90cc4..ba4d404 100644 --- a/internal/orchestrator/deps_test.go +++ b/internal/orchestrator/deps_test.go @@ -36,12 +36,6 @@ func NewFakeFS() *FakeFS { func (f *FakeFS) onDisk(path string) string { clean := filepath.Clean(path) - // Allow passing already-mapped on-disk paths back into FakeFS methods. - // This is important for helpers that return OS paths (e.g., MkdirTemp/CreateTemp) - // which are then reused by other FS operations in tests. - if clean == f.Root || strings.HasPrefix(clean, f.Root+string(filepath.Separator)) { - return clean - } clean = strings.TrimPrefix(clean, string(filepath.Separator)) return filepath.Join(f.Root, clean) } diff --git a/internal/orchestrator/network_apply.go b/internal/orchestrator/network_apply.go index 6a7fa27..dec3303 100644 --- a/internal/orchestrator/network_apply.go +++ b/internal/orchestrator/network_apply.go @@ -254,20 +254,15 @@ func armNetworkRollback(ctx context.Context, logger *logging.Logger, backupPath } logging.DebugStep(logger, "arm network rollback", "Handle created: marker=%s script=%s log=%s", handle.markerPath, handle.scriptPath, handle.logPath) - logging.DebugStep(logger, "arm network rollback", "Create rollback log: %s", handle.logPath) - if err := restoreFS.WriteFile(handle.logPath, []byte(""), 0o600); err != nil { - return nil, fmt.Errorf("create rollback log: %w", err) - } - logging.DebugStep(logger, "arm network rollback", "Write rollback marker: %s", handle.markerPath) - if err := restoreFS.WriteFile(handle.markerPath, []byte("pending\n"), 0o600); err != nil { + if err := restoreFS.WriteFile(handle.markerPath, []byte("pending\n"), 0o640); err != nil { return nil, fmt.Errorf("write rollback marker: %w", err) } logging.DebugStep(logger, "arm network rollback", "Marker written successfully") logging.DebugStep(logger, "arm network rollback", "Write rollback script: %s", handle.scriptPath) script := buildRollbackScript(handle.markerPath, backupPath, handle.logPath, true) - if err := restoreFS.WriteFile(handle.scriptPath, []byte(script), 0o600); err != nil { + if err := restoreFS.WriteFile(handle.scriptPath, []byte(script), 0o640); err != nil { return nil, fmt.Errorf("write rollback script: %w", err) } logging.DebugStep(logger, "arm network rollback", "Script written successfully (%d bytes)", len(script)) @@ -598,19 +593,14 @@ func rollbackNetworkFilesNow(ctx context.Context, logger *logging.Logger, backup scriptPath := filepath.Join(baseDir, fmt.Sprintf("network_rollback_now_%s.sh", timestamp)) logPath = filepath.Join(baseDir, fmt.Sprintf("network_rollback_now_%s.log", timestamp)) - logging.DebugStep(logger, "rollback network files", "Create rollback log: %s", logPath) - if err := restoreFS.WriteFile(logPath, []byte(""), 0o600); err != nil { - return "", fmt.Errorf("create rollback log: %w", err) - } - logging.DebugStep(logger, "rollback network files", "Write rollback marker: %s", markerPath) - if err := restoreFS.WriteFile(markerPath, []byte("pending\n"), 0o600); err != nil { + if err := restoreFS.WriteFile(markerPath, []byte("pending\n"), 0o640); err != nil { return "", fmt.Errorf("write rollback marker: %w", err) } logging.DebugStep(logger, "rollback network files", "Write rollback script: %s", scriptPath) script := buildRollbackScript(markerPath, backupPath, logPath, false) - if err := restoreFS.WriteFile(scriptPath, []byte(script), 0o600); err != nil { + if err := restoreFS.WriteFile(scriptPath, []byte(script), 0o640); err != nil { _ = restoreFS.Remove(markerPath) return "", fmt.Errorf("write rollback script: %w", err) } @@ -633,6 +623,7 @@ func rollbackNetworkFilesNow(ctx context.Context, logger *logging.Logger, backup func buildRollbackScript(markerPath, backupPath, logPath string, restartNetworking bool) string { lines := []string{ + "#!/bin/sh", "set -eu", fmt.Sprintf("LOG=%s", shellQuote(logPath)), fmt.Sprintf("MARKER=%s", shellQuote(markerPath)), diff --git a/internal/orchestrator/orchestrator.go b/internal/orchestrator/orchestrator.go index c953c23..1fa56ae 100644 --- a/internal/orchestrator/orchestrator.go +++ b/internal/orchestrator/orchestrator.go @@ -1425,17 +1425,6 @@ func (o *Orchestrator) cleanupPreviousExecutionArtifacts() *TempDirRegistry { } } - // Phase 5: Cleanup old restore staging directories under /tmp/proxsave - stageRemoved, stageFailed := cleanupOldRestoreStageDirs(fs, o.logger, o.now(), tempDirCleanupAge) - if stageRemoved > 0 || stageFailed > 0 { - if !cleanupStarted { - o.logger.Debug("Starting cleanup of previous execution files...") - cleanupStarted = true - } - removedDirs += stageRemoved - failedFiles += stageFailed - } - // Final summary - only show if cleanup was actually performed if cleanupStarted { if removedFiles > 0 || removedDirs > 0 { @@ -1511,6 +1500,7 @@ func applyCollectorOverrides(cc *backup.CollectorConfig, cfg *config.Config) { cc.BackupCephConfig = cfg.BackupCephConfig cc.CephConfigPath = cfg.CephConfigPath cc.PveshTimeoutSeconds = cfg.PveshTimeoutSeconds + cc.FsIoTimeoutSeconds = cfg.FsIoTimeoutSeconds cc.BackupDatastoreConfigs = cfg.BackupDatastoreConfigs cc.BackupPBSS3Endpoints = cfg.BackupPBSS3Endpoints diff --git a/internal/orchestrator/pbs_staged_apply.go b/internal/orchestrator/pbs_staged_apply.go index 6ee4b5b..be72037 100644 --- a/internal/orchestrator/pbs_staged_apply.go +++ b/internal/orchestrator/pbs_staged_apply.go @@ -8,7 +8,6 @@ import ( "os" "path/filepath" "strings" - "time" "github.com/tis24dev/proxsave/internal/logging" ) @@ -42,9 +41,25 @@ func maybeApplyPBSConfigsFromStage(ctx context.Context, logger *logging.Logger, } behavior := plan.PBSRestoreBehavior + strict := behavior == PBSRestoreBehaviorClean + allowFileFallback := behavior == PBSRestoreBehaviorClean + + needsAPI := plan.HasCategoryID("pbs_host") || plan.HasCategoryID("datastore_pbs") || plan.HasCategoryID("pbs_remotes") || plan.HasCategoryID("pbs_jobs") + apiAvailable := false + if needsAPI { + if err := ensurePBSServicesForAPI(ctx, logger); err != nil { + if allowFileFallback { + logger.Warning("PBS API apply unavailable; falling back to file-based staged apply where possible: %v", err) + } else { + logger.Warning("PBS API apply unavailable; skipping API-applied PBS categories (merge mode): %v", err) + } + } else { + apiAvailable = true + } + } if plan.HasCategoryID("pbs_host") { - // Restore file-only configs (no stable API coverage yet). + // Always restore file-only configs (no stable API coverage yet). // ACME should be applied before node config (node.cfg references ACME accounts/plugins). for _, rel := range []string{ "etc/proxmox-backup/acme/accounts.cfg", @@ -56,212 +71,117 @@ func maybeApplyPBSConfigsFromStage(ctx context.Context, logger *logging.Logger, logger.Warning("PBS staged apply: %s: %v", rel, err) } } - } - - if plan.HasCategoryID("pbs_tape") { - if err := applyPBSTapeConfigsFromStage(ctx, logger, stageRoot); err != nil { - logger.Warning("PBS staged apply: tape configs: %v", err) - } - } - - if behavior == PBSRestoreBehaviorUnspecified { - logging.DebugStep(logger, "pbs staged apply", "PBS restore behavior is unspecified; API-backed PBS categories (node/datastores/remotes/jobs/notifications) will be applied in the final staged API phase") - } - - return nil -} - -func maybeApplyPBSConfigsViaAPIFromStage(ctx context.Context, logger *logging.Logger, plan *RestorePlan, stageRoot string, dryRun bool, stopServicesAfter bool) (err error) { - if plan == nil || plan.SystemType != SystemTypePBS { - return nil - } - if strings.TrimSpace(stageRoot) == "" { - logging.DebugStep(logger, "pbs staged apply (api)", "Skipped: staging directory not available") - return nil - } - // API-backed PBS categories. - if !plan.HasCategoryID("pbs_host") && - !plan.HasCategoryID("datastore_pbs") && - !plan.HasCategoryID("pbs_remotes") && - !plan.HasCategoryID("pbs_jobs") && - !plan.HasCategoryID("pbs_notifications") { - return nil - } - - done := logging.DebugStart(logger, "pbs staged apply (api)", "dryRun=%v stage=%s", dryRun, stageRoot) - defer func() { done(err) }() - if dryRun { - logger.Info("Dry run enabled: skipping staged PBS API apply") - return nil - } - if !isRealRestoreFS(restoreFS) { - logger.Debug("Skipping staged PBS API apply: non-system filesystem in use") - return nil - } - if os.Geteuid() != 0 { - logger.Warning("Skipping staged PBS API apply: requires root privileges") - return nil - } - - behavior := plan.PBSRestoreBehavior - strict := behavior == PBSRestoreBehaviorClean - allowFileFallback := behavior == PBSRestoreBehaviorClean - - ensureAttempted := false - defer func() { - if !stopServicesAfter || !ensureAttempted { - return - } - stopCtx, cancel := context.WithTimeout(context.Background(), 2*serviceStopTimeout+2*serviceVerifyTimeout+10*time.Second) - defer cancel() - if stopErr := stopPBSServices(stopCtx, logger); stopErr != nil { - logger.Warning("Failed to stop PBS services after staged API apply: %v", stopErr) - } - }() - - ensureAttempted = true - if apiErr := ensurePBSServicesForAPI(ctx, logger); apiErr != nil { - if allowFileFallback { - logger.Warning("PBS API apply unavailable; falling back to file-based staged apply where possible: %v", apiErr) - if plan.HasCategoryID("pbs_host") { - for _, rel := range []string{ - "etc/proxmox-backup/traffic-control.cfg", - "etc/proxmox-backup/node.cfg", - } { - if err := applyPBSConfigFileFromStage(ctx, logger, stageRoot, rel); err != nil { - logger.Warning("PBS staged apply: %s: %v", rel, err) - } + if apiAvailable { + if err := applyPBSTrafficControlCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + logger.Warning("PBS API apply: traffic-control failed: %v", err) + if allowFileFallback { + logger.Warning("PBS staged apply: falling back to file-based traffic-control.cfg") + _ = applyPBSConfigFileFromStage(ctx, logger, stageRoot, "etc/proxmox-backup/traffic-control.cfg") } } - if plan.HasCategoryID("datastore_pbs") { - if err := applyPBSS3CfgFromStage(ctx, logger, stageRoot); err != nil { - logger.Warning("PBS staged apply: s3.cfg: %v", err) - } - if err := applyPBSDatastoreCfgFromStage(ctx, logger, stageRoot); err != nil { - logger.Warning("PBS staged apply: datastore.cfg: %v", err) + if err := applyPBSNodeCfgViaAPI(ctx, stageRoot); err != nil { + logger.Warning("PBS API apply: node config failed: %v", err) + if allowFileFallback { + logger.Warning("PBS staged apply: falling back to file-based node.cfg") + _ = applyPBSConfigFileFromStage(ctx, logger, stageRoot, "etc/proxmox-backup/node.cfg") } } - if plan.HasCategoryID("pbs_remotes") { - if err := applyPBSRemoteCfgFromStage(ctx, logger, stageRoot); err != nil { - logger.Warning("PBS staged apply: remote.cfg: %v", err) + } else if allowFileFallback { + for _, rel := range []string{ + "etc/proxmox-backup/traffic-control.cfg", + "etc/proxmox-backup/node.cfg", + } { + if err := applyPBSConfigFileFromStage(ctx, logger, stageRoot, rel); err != nil { + logger.Warning("PBS staged apply: %s: %v", rel, err) } } - if plan.HasCategoryID("pbs_jobs") { - if err := applyPBSJobConfigsFromStage(ctx, logger, stageRoot); err != nil { - logger.Warning("PBS staged apply: job configs: %v", err) - } - } - if plan.HasCategoryID("pbs_notifications") { - if err := applyPBSNotificationsFromStage(ctx, logger, stageRoot); err != nil { - logger.Warning("PBS staged apply: notifications: %v", err) - } - } - return nil - } - - var skipped []string - if plan.HasCategoryID("pbs_host") { - skipped = append(skipped, "pbs_host (node.cfg/traffic-control.cfg)") - } - if plan.HasCategoryID("datastore_pbs") { - skipped = append(skipped, "datastore_pbs") - } - if plan.HasCategoryID("pbs_remotes") { - skipped = append(skipped, "pbs_remotes") - } - if plan.HasCategoryID("pbs_jobs") { - skipped = append(skipped, "pbs_jobs") - } - if plan.HasCategoryID("pbs_notifications") { - skipped = append(skipped, "pbs_notifications") - } - if len(skipped) == 0 { - skipped = append(skipped, "PBS API categories") + } else { + logging.DebugStep(logger, "pbs staged apply", "Skipping node.cfg/traffic-control.cfg: merge mode requires PBS API apply") } - return fmt.Errorf("PBS API apply unavailable in %s; skipped %s: %w", behavior.DisplayName(), strings.Join(skipped, ", "), apiErr) } - if plan.HasCategoryID("pbs_host") { - if err := applyPBSTrafficControlCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { - logger.Warning("PBS API apply: traffic-control failed: %v", err) - if allowFileFallback { - logger.Warning("PBS staged apply: falling back to file-based traffic-control.cfg") - _ = applyPBSConfigFileFromStage(ctx, logger, stageRoot, "etc/proxmox-backup/traffic-control.cfg") + if plan.HasCategoryID("datastore_pbs") { + if apiAvailable { + if err := applyPBSS3CfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + logger.Warning("PBS API apply: s3.cfg failed: %v", err) + if allowFileFallback { + logger.Warning("PBS staged apply: falling back to file-based s3.cfg") + _ = applyPBSS3CfgFromStage(ctx, logger, stageRoot) + } } - } - if err := applyPBSNodeCfgViaAPI(ctx, stageRoot); err != nil { - logger.Warning("PBS API apply: node config failed: %v", err) - if allowFileFallback { - logger.Warning("PBS staged apply: falling back to file-based node.cfg") - _ = applyPBSConfigFileFromStage(ctx, logger, stageRoot, "etc/proxmox-backup/node.cfg") + if err := applyPBSDatastoreCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + logger.Warning("PBS API apply: datastore.cfg failed: %v", err) + if allowFileFallback { + logger.Warning("PBS staged apply: falling back to file-based datastore.cfg") + _ = applyPBSDatastoreCfgFromStage(ctx, logger, stageRoot) + } } - } - } - - if plan.HasCategoryID("datastore_pbs") { - if err := applyPBSS3CfgViaAPI(ctx, logger, stageRoot, strict); err != nil { - logger.Warning("PBS API apply: s3.cfg failed: %v", err) - if allowFileFallback { - logger.Warning("PBS staged apply: falling back to file-based s3.cfg") - _ = applyPBSS3CfgFromStage(ctx, logger, stageRoot) + } else if allowFileFallback { + if err := applyPBSS3CfgFromStage(ctx, logger, stageRoot); err != nil { + logger.Warning("PBS staged apply: s3.cfg: %v", err) } - } - if err := applyPBSDatastoreCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { - logger.Warning("PBS API apply: datastore.cfg failed: %v", err) - if allowFileFallback { - logger.Warning("PBS staged apply: falling back to file-based datastore.cfg") - _ = applyPBSDatastoreCfgFromStage(ctx, logger, stageRoot) + if err := applyPBSDatastoreCfgFromStage(ctx, logger, stageRoot); err != nil { + logger.Warning("PBS staged apply: datastore.cfg: %v", err) } + } else { + logging.DebugStep(logger, "pbs staged apply", "Skipping datastore.cfg/s3.cfg: merge mode requires PBS API apply") } } if plan.HasCategoryID("pbs_remotes") { - if err := applyPBSRemoteCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { - logger.Warning("PBS API apply: remote.cfg failed: %v", err) - if allowFileFallback { - logger.Warning("PBS staged apply: falling back to file-based remote.cfg") - _ = applyPBSRemoteCfgFromStage(ctx, logger, stageRoot) + if apiAvailable { + if err := applyPBSRemoteCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + logger.Warning("PBS API apply: remote.cfg failed: %v", err) + if allowFileFallback { + logger.Warning("PBS staged apply: falling back to file-based remote.cfg") + _ = applyPBSRemoteCfgFromStage(ctx, logger, stageRoot) + } + } + } else if allowFileFallback { + if err := applyPBSRemoteCfgFromStage(ctx, logger, stageRoot); err != nil { + logger.Warning("PBS staged apply: remote.cfg: %v", err) } + } else { + logging.DebugStep(logger, "pbs staged apply", "Skipping remote.cfg: merge mode requires PBS API apply") } } if plan.HasCategoryID("pbs_jobs") { - fallbackApplied := false - applyFallback := func() { - if !allowFileFallback || fallbackApplied { - return + if apiAvailable { + if err := applyPBSSyncCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + logger.Warning("PBS API apply: sync jobs failed: %v", err) + if allowFileFallback { + logger.Warning("PBS staged apply: falling back to file-based job configs") + _ = applyPBSJobConfigsFromStage(ctx, logger, stageRoot) + } } - fallbackApplied = true - logger.Warning("PBS staged apply: falling back to file-based job configs") - _ = applyPBSJobConfigsFromStage(ctx, logger, stageRoot) - } - - if err := applyPBSSyncCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { - logger.Warning("PBS API apply: sync jobs failed: %v", err) - applyFallback() - } - if err := applyPBSVerificationCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { - logger.Warning("PBS API apply: verification jobs failed: %v", err) - applyFallback() - } - if err := applyPBSPruneCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { - logger.Warning("PBS API apply: prune jobs failed: %v", err) - applyFallback() + if err := applyPBSVerificationCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + logger.Warning("PBS API apply: verification jobs failed: %v", err) + if allowFileFallback { + logger.Warning("PBS staged apply: falling back to file-based job configs") + _ = applyPBSJobConfigsFromStage(ctx, logger, stageRoot) + } + } + if err := applyPBSPruneCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + logger.Warning("PBS API apply: prune jobs failed: %v", err) + if allowFileFallback { + logger.Warning("PBS staged apply: falling back to file-based job configs") + _ = applyPBSJobConfigsFromStage(ctx, logger, stageRoot) + } + } + } else if allowFileFallback { + if err := applyPBSJobConfigsFromStage(ctx, logger, stageRoot); err != nil { + logger.Warning("PBS staged apply: job configs: %v", err) + } + } else { + logging.DebugStep(logger, "pbs staged apply", "Skipping sync/verification/prune configs: merge mode requires PBS API apply") } } - if plan.HasCategoryID("pbs_notifications") { - if err := applyPBSNotificationsViaAPI(ctx, logger, stageRoot, strict); err != nil { - logger.Warning("PBS notifications API apply failed: %v", err) - if allowFileFallback { - logger.Warning("PBS notifications API apply failed; falling back to file-based apply") - _ = applyPBSNotificationsFromStage(ctx, logger, stageRoot) - } else { - logger.Warning("PBS notifications API apply failed; skipping apply (merge mode)") - } - } else { - logger.Info("PBS notifications applied via API (%s)", behavior.DisplayName()) + if plan.HasCategoryID("pbs_tape") { + if err := applyPBSTapeConfigsFromStage(ctx, logger, stageRoot); err != nil { + logger.Warning("PBS staged apply: tape configs: %v", err) } } diff --git a/internal/orchestrator/restore.go b/internal/orchestrator/restore.go index 06ad2bc..2f14d20 100644 --- a/internal/orchestrator/restore.go +++ b/internal/orchestrator/restore.go @@ -18,7 +18,6 @@ import ( "syscall" "time" - "github.com/tis24dev/proxsave/internal/backup" "github.com/tis24dev/proxsave/internal/config" "github.com/tis24dev/proxsave/internal/input" "github.com/tis24dev/proxsave/internal/logging" @@ -36,7 +35,6 @@ var ( serviceRetryDelay = 500 * time.Millisecond restoreLogSequence uint64 restoreGlob = filepath.Glob - pveClusterServices = [...]string{"pve-cluster", "pvedaemon", "pveproxy", "pvestatd"} ) // RestoreAbortInfo contains information about an aborted restore with network rollback. @@ -142,8 +140,8 @@ func checkZFSPoolsAfterRestore(logger *logging.Logger) error { } func stopPVEClusterServices(ctx context.Context, logger *logging.Logger) error { - for i := len(pveClusterServices) - 1; i >= 0; i-- { - service := pveClusterServices[i] + services := []string{"pve-cluster", "pvedaemon", "pveproxy", "pvestatd"} + for _, service := range services { if err := stopServiceWithRetries(ctx, logger, service); err != nil { return fmt.Errorf("failed to stop PVE services (%s): %w", service, err) } @@ -152,7 +150,8 @@ func stopPVEClusterServices(ctx context.Context, logger *logging.Logger) error { } func startPVEClusterServices(ctx context.Context, logger *logging.Logger) error { - for _, service := range pveClusterServices { + services := []string{"pve-cluster", "pvedaemon", "pveproxy", "pvestatd"} + for _, service := range services { if err := startServiceWithRetries(ctx, logger, service); err != nil { return fmt.Errorf("failed to start PVE services (%s): %w", service, err) } @@ -1297,7 +1296,7 @@ func extractArchiveNative(ctx context.Context, archivePath, destRoot string, log if selectiveMode { shouldExtract := false for _, cat := range categories { - if archiveEntryMatchesCategory(header.Name, cat) { + if PathMatchesCategory(header.Name, cat) { shouldExtract = true break } @@ -1327,12 +1326,6 @@ func extractArchiveNative(ctx context.Context, archivePath, destRoot string, log } } - // Reassemble any files that were split into chunks during backup optimization. - // This is a no-op when the archive contains no chunked files. - if err := backup.ReassembleChunkedFiles(logger, destRoot); err != nil { - logger.Warning("Chunk reassembly failed: %v", err) - } - // Write detailed log if logFile != nil { fmt.Fprintf(logFile, "=== FILES RESTORED ===\n") @@ -1382,35 +1375,6 @@ func extractArchiveNative(ctx context.Context, archivePath, destRoot string, log return nil } -func archiveEntryMatchesCategory(entryName string, category Category) bool { - if PathMatchesCategory(entryName, category) { - return true - } - - clean := strings.TrimPrefix(strings.TrimSpace(entryName), "./") - - // Marker files created by smart chunking: .chunked - if strings.HasSuffix(clean, ".chunked") { - original := strings.TrimSuffix(clean, ".chunked") - if original != clean && PathMatchesCategory(original, category) { - return true - } - } - - // Chunk files stored under chunked_files/..chunk - if strings.HasPrefix(clean, "chunked_files/") { - trimmed := strings.TrimPrefix(clean, "chunked_files/") - if PathMatchesCategory(trimmed, category) { - return true - } - if original, ok := originalPathFromChunk(trimmed); ok && PathMatchesCategory(original, category) { - return true - } - } - - return false -} - func isRealRestoreFS(fs FS) bool { switch fs.(type) { case osFS, *osFS: diff --git a/internal/orchestrator/restore_access_control_ui.go b/internal/orchestrator/restore_access_control_ui.go index 475d8a5..2fecd71 100644 --- a/internal/orchestrator/restore_access_control_ui.go +++ b/internal/orchestrator/restore_access_control_ui.go @@ -357,16 +357,12 @@ func armAccessControlRollback(ctx context.Context, logger *logging.Logger, backu timeout: timeout, } - if err := restoreFS.WriteFile(handle.logPath, []byte(""), 0o600); err != nil { - return nil, fmt.Errorf("create rollback log: %w", err) - } - - if err := restoreFS.WriteFile(handle.markerPath, []byte("pending\n"), 0o600); err != nil { + if err := restoreFS.WriteFile(handle.markerPath, []byte("pending\n"), 0o640); err != nil { return nil, fmt.Errorf("write rollback marker: %w", err) } script := buildAccessControlRollbackScript(handle.markerPath, backupPath, handle.logPath) - if err := restoreFS.WriteFile(handle.scriptPath, []byte(script), 0o600); err != nil { + if err := restoreFS.WriteFile(handle.scriptPath, []byte(script), 0o640); err != nil { return nil, fmt.Errorf("write rollback script: %w", err) } @@ -431,6 +427,7 @@ func buildAccessControlRollbackScript(markerPath, backupPath, logPath string) st } lines := []string{ + "#!/bin/sh", "set -eu", fmt.Sprintf("LOG=%s", shellQuote(logPath)), fmt.Sprintf("MARKER=%s", shellQuote(markerPath)), @@ -491,3 +488,4 @@ func buildAccessControlRollbackScript(markerPath, backupPath, logPath string) st ) return strings.Join(lines, "\n") + "\n" } + diff --git a/internal/orchestrator/restore_errors_test.go b/internal/orchestrator/restore_errors_test.go index 17a0ea0..cad33a8 100644 --- a/internal/orchestrator/restore_errors_test.go +++ b/internal/orchestrator/restore_errors_test.go @@ -26,52 +26,12 @@ func TestAnalyzeBackupCategories_OpenError(t *testing.T) { restoreFS = fakeFS logger := logging.New(logging.GetDefaultLogger().GetLevel(), false) - _, err := AnalyzeBackupCategories(context.Background(), "/missing/archive.tar", logger) + _, err := AnalyzeBackupCategories("/missing/archive.tar", logger) if err == nil { t.Fatalf("expected error when archive cannot be opened") } } -func TestAnalyzeBackupCategories_TarReadError(t *testing.T) { - orig := restoreFS - defer func() { restoreFS = orig }() - fakeFS := NewFakeFS() - defer func() { _ = os.RemoveAll(fakeFS.Root) }() - restoreFS = fakeFS - logger := logging.New(logging.GetDefaultLogger().GetLevel(), false) - - payload := bytes.Repeat([]byte("a"), 2048) - var buf bytes.Buffer - tw := tar.NewWriter(&buf) - if err := tw.WriteHeader(&tar.Header{Name: "etc/hosts", Mode: 0o644, Size: int64(len(payload))}); err != nil { - t.Fatalf("WriteHeader: %v", err) - } - if _, err := tw.Write(payload); err != nil { - t.Fatalf("Write: %v", err) - } - if err := tw.Close(); err != nil { - t.Fatalf("Close: %v", err) - } - - full := buf.Bytes() - if len(full) < 700 { - t.Fatalf("unexpected tar size: %d", len(full)) - } - truncated := full[:700] - - if err := fakeFS.AddFile("/broken.tar", truncated); err != nil { - t.Fatalf("AddFile: %v", err) - } - - _, err := AnalyzeBackupCategories(context.Background(), "/broken.tar", logger) - if err == nil { - t.Fatalf("expected error for truncated tar archive") - } - if !strings.Contains(err.Error(), "read archive entries") { - t.Fatalf("unexpected error: %v", err) - } -} - func TestRunRestoreCommandStream_UsesStreamingRunner(t *testing.T) { orig := restoreCmd defer func() { restoreCmd = orig }() @@ -231,30 +191,23 @@ func TestStopPVEClusterServices_UsesNoBlock(t *testing.T) { t.Fatalf("expected success stopping PVE services, got %v", err) } - wantStopOrder := []string{ - "systemctl stop --no-block pvestatd", - "systemctl stop --no-block pveproxy", - "systemctl stop --no-block pvedaemon", + wantStops := []string{ "systemctl stop --no-block pve-cluster", + "systemctl stop --no-block pvedaemon", + "systemctl stop --no-block pveproxy", + "systemctl stop --no-block pvestatd", } - indexOfCall := func(calls []string, want string) int { - for i, call := range calls { - if call == want { - return i + for _, cmd := range wantStops { + found := false + for _, call := range fake.Calls { + if call == cmd { + found = true + break } } - return -1 - } - prevIdx := -1 - for _, cmd := range wantStopOrder { - idx := indexOfCall(fake.Calls, cmd) - if idx < 0 { + if !found { t.Fatalf("expected %s to be called, calls: %#v", cmd, fake.Calls) } - if idx <= prevIdx { - t.Fatalf("expected %s to be called after previous stop command, calls: %#v", cmd, fake.Calls) - } - prevIdx = idx } } diff --git a/internal/orchestrator/restore_firewall.go b/internal/orchestrator/restore_firewall.go index 853e5b6..64c7419 100644 --- a/internal/orchestrator/restore_firewall.go +++ b/internal/orchestrator/restore_firewall.go @@ -434,16 +434,12 @@ func armFirewallRollback(ctx context.Context, logger *logging.Logger, backupPath timeout: timeout, } - if err := restoreFS.WriteFile(handle.logPath, []byte(""), 0o600); err != nil { - return nil, fmt.Errorf("create rollback log: %w", err) - } - - if err := restoreFS.WriteFile(handle.markerPath, []byte("pending\n"), 0o600); err != nil { + if err := restoreFS.WriteFile(handle.markerPath, []byte("pending\n"), 0o640); err != nil { return nil, fmt.Errorf("write rollback marker: %w", err) } script := buildFirewallRollbackScript(handle.markerPath, backupPath, handle.logPath) - if err := restoreFS.WriteFile(handle.scriptPath, []byte(script), 0o600); err != nil { + if err := restoreFS.WriteFile(handle.scriptPath, []byte(script), 0o640); err != nil { return nil, fmt.Errorf("write rollback script: %w", err) } @@ -495,16 +491,11 @@ func disarmFirewallRollback(ctx context.Context, logger *logging.Logger, handle _, _ = restoreCmd.Run(ctx, "systemctl", "stop", timerUnit) _, _ = restoreCmd.Run(ctx, "systemctl", "reset-failed", strings.TrimSpace(handle.unitName)+".service", timerUnit) } - - if strings.TrimSpace(handle.scriptPath) != "" { - if err := restoreFS.Remove(handle.scriptPath); err != nil && !errors.Is(err, os.ErrNotExist) { - logger.Warning("Failed to remove firewall rollback script %s: %v", handle.scriptPath, err) - } - } } func buildFirewallRollbackScript(markerPath, backupPath, logPath string) string { lines := []string{ + "#!/bin/sh", "set -eu", fmt.Sprintf("LOG=%s", shellQuote(logPath)), fmt.Sprintf("MARKER=%s", shellQuote(markerPath)), diff --git a/internal/orchestrator/restore_firewall_test.go b/internal/orchestrator/restore_firewall_test.go index c1fabf6..5ce76d5 100644 --- a/internal/orchestrator/restore_firewall_test.go +++ b/internal/orchestrator/restore_firewall_test.go @@ -1,7 +1,6 @@ package orchestrator import ( - "context" "os" "testing" ) @@ -47,34 +46,6 @@ func TestSyncDirExact_PrunesExtraneousFiles(t *testing.T) { } } -func TestDisarmFirewallRollback_RemovesMarkerAndScript(t *testing.T) { - origFS := restoreFS - t.Cleanup(func() { restoreFS = origFS }) - - fakeFS := NewFakeFS() - restoreFS = fakeFS - - handle := &firewallRollbackHandle{ - markerPath: "/tmp/proxsave/firewall_rollback_pending_test", - scriptPath: "/tmp/proxsave/firewall_rollback_test.sh", - } - if err := fakeFS.AddFile(handle.markerPath, []byte("pending\n")); err != nil { - t.Fatalf("add marker: %v", err) - } - if err := fakeFS.AddFile(handle.scriptPath, []byte("#!/bin/sh\nexit 0\n")); err != nil { - t.Fatalf("add script: %v", err) - } - - disarmFirewallRollback(context.Background(), newTestLogger(), handle) - - if _, err := fakeFS.Stat(handle.markerPath); err == nil || !os.IsNotExist(err) { - t.Fatalf("expected marker to be removed; stat err=%v", err) - } - if _, err := fakeFS.Stat(handle.scriptPath); err == nil || !os.IsNotExist(err) { - t.Fatalf("expected script to be removed; stat err=%v", err) - } -} - func TestApplyPVEFirewallFromStage_AppliesFirewallAndHostFW(t *testing.T) { origFS := restoreFS t.Cleanup(func() { restoreFS = origFS }) diff --git a/internal/orchestrator/restore_ha.go b/internal/orchestrator/restore_ha.go index a48227e..d69db66 100644 --- a/internal/orchestrator/restore_ha.go +++ b/internal/orchestrator/restore_ha.go @@ -364,16 +364,12 @@ func armHARollback(ctx context.Context, logger *logging.Logger, backupPath strin timeout: timeout, } - if err := restoreFS.WriteFile(handle.logPath, []byte(""), 0o600); err != nil { - return nil, fmt.Errorf("create rollback log: %w", err) - } - - if err := restoreFS.WriteFile(handle.markerPath, []byte("pending\n"), 0o600); err != nil { + if err := restoreFS.WriteFile(handle.markerPath, []byte("pending\n"), 0o640); err != nil { return nil, fmt.Errorf("write rollback marker: %w", err) } script := buildHARollbackScript(handle.markerPath, backupPath, handle.logPath) - if err := restoreFS.WriteFile(handle.scriptPath, []byte(script), 0o600); err != nil { + if err := restoreFS.WriteFile(handle.scriptPath, []byte(script), 0o640); err != nil { return nil, fmt.Errorf("write rollback script: %w", err) } @@ -430,6 +426,7 @@ func disarmHARollback(ctx context.Context, logger *logging.Logger, handle *haRol func buildHARollbackScript(markerPath, backupPath, logPath string) string { lines := []string{ + "#!/bin/sh", "set -eu", fmt.Sprintf("LOG=%s", shellQuote(logPath)), fmt.Sprintf("MARKER=%s", shellQuote(markerPath)), diff --git a/internal/orchestrator/restore_notifications.go b/internal/orchestrator/restore_notifications.go index 6c30459..3774a5d 100644 --- a/internal/orchestrator/restore_notifications.go +++ b/internal/orchestrator/restore_notifications.go @@ -53,8 +53,31 @@ func maybeApplyNotificationsFromStage(ctx context.Context, logger *logging.Logge switch plan.SystemType { case SystemTypePBS: - // PBS notification restore is applied as part of the final PBS API staged apply phase - // to avoid restarting PBS services before other file-based staged apply steps complete. + if !plan.HasCategoryID("pbs_notifications") { + return nil + } + behavior := plan.PBSRestoreBehavior + strict := behavior == PBSRestoreBehaviorClean + allowFileFallback := behavior == PBSRestoreBehaviorClean + + if err := ensurePBSServicesForAPI(ctx, logger); err != nil { + if allowFileFallback { + logger.Warning("PBS notifications API apply unavailable; falling back to file-based apply: %v", err) + return applyPBSNotificationsFromStage(ctx, logger, stageRoot) + } + logger.Warning("PBS notifications API apply unavailable; skipping apply (merge mode): %v", err) + return nil + } + + if err := applyPBSNotificationsViaAPI(ctx, logger, stageRoot, strict); err != nil { + if allowFileFallback { + logger.Warning("PBS notifications API apply failed; falling back to file-based apply: %v", err) + return applyPBSNotificationsFromStage(ctx, logger, stageRoot) + } + logger.Warning("PBS notifications API apply failed; skipping apply (merge mode): %v", err) + return nil + } + logger.Info("PBS notifications applied via API (%s)", behavior.DisplayName()) return nil case SystemTypePVE: if !plan.HasCategoryID("pve_notifications") { diff --git a/internal/orchestrator/restore_workflow_test.go b/internal/orchestrator/restore_workflow_test.go index f1ee4c2..e6d836f 100644 --- a/internal/orchestrator/restore_workflow_test.go +++ b/internal/orchestrator/restore_workflow_test.go @@ -3,7 +3,6 @@ package orchestrator import ( "archive/tar" "context" - "errors" "os" "path/filepath" "testing" @@ -80,7 +79,6 @@ func TestRunRestoreWorkflow_CustomModeNoCategories_Succeeds(t *testing.T) { Manifest: &backup.Manifest{ CreatedAt: time.Unix(1700000000, 0), ClusterMode: "standalone", - ProxmoxType: "pve", ScriptVersion: "1.0.0", }, } @@ -132,7 +130,6 @@ func TestRunRestoreWorkflow_ConfirmFalseAborts(t *testing.T) { Manifest: &backup.Manifest{ CreatedAt: time.Unix(1700000000, 0), ClusterMode: "standalone", - ProxmoxType: "pve", ScriptVersion: "1.0.0", }, } @@ -157,108 +154,3 @@ func TestRunRestoreWorkflow_ConfirmFalseAborts(t *testing.T) { t.Fatalf("err=%v; want %v", err, ErrRestoreAborted) } } - -func TestRunRestoreWorkflow_AnalysisFailure_FallsBackToSafeFullRestore(t *testing.T) { - origRestoreFS := restoreFS - origRestoreCmd := restoreCmd - origRestoreSystem := restoreSystem - origRestoreTime := restoreTime - origCompatFS := compatFS - origPrepare := prepareRestoreBundleFunc - origAnalyze := analyzeBackupCategoriesFunc - origSafetyFS := safetyFS - origSafetyNow := safetyNow - t.Cleanup(func() { - restoreFS = origRestoreFS - restoreCmd = origRestoreCmd - restoreSystem = origRestoreSystem - restoreTime = origRestoreTime - compatFS = origCompatFS - prepareRestoreBundleFunc = origPrepare - analyzeBackupCategoriesFunc = origAnalyze - safetyFS = origSafetyFS - safetyNow = origSafetyNow - }) - - fakeFS := NewFakeFS() - t.Cleanup(func() { _ = os.RemoveAll(fakeFS.Root) }) - restoreFS = fakeFS - compatFS = fakeFS - safetyFS = fakeFS - - fakeNow := &FakeTime{Current: time.Date(2020, 1, 2, 3, 4, 5, 0, time.UTC)} - restoreTime = fakeNow - safetyNow = fakeNow.Now - - // Make compatibility detection treat this as PBS (to avoid compatibility prompts). - if err := fakeFS.AddDir("/etc/proxmox-backup"); err != nil { - t.Fatalf("fakeFS.AddDir: %v", err) - } - restoreSystem = fakeSystemDetector{systemType: SystemTypePVE} - - restoreCmd = &FakeCommandRunner{ - Outputs: map[string][]byte{ - "ip route show default": []byte(""), - }, - Errors: map[string]error{}, - } - - tmpTar := filepath.Join(t.TempDir(), "bundle.tar") - if err := writeTarFile(tmpTar, map[string]string{ - "etc/hosts": "127.0.0.1 localhost\n", - }); err != nil { - t.Fatalf("writeTarFile: %v", err) - } - tarBytes, err := os.ReadFile(tmpTar) - if err != nil { - t.Fatalf("ReadFile tar: %v", err) - } - if err := fakeFS.WriteFile("/bundle.tar", tarBytes, 0o640); err != nil { - t.Fatalf("fakeFS.WriteFile: %v", err) - } - - prepareRestoreBundleFunc = func(ctx context.Context, cfg *config.Config, logger *logging.Logger, version string, ui RestoreWorkflowUI) (*decryptCandidate, *preparedBundle, error) { - cand := &decryptCandidate{ - DisplayBase: "test", - Manifest: &backup.Manifest{ - CreatedAt: fakeNow.Now(), - ClusterMode: "standalone", - ProxmoxType: "pbs", - ScriptVersion: "vtest", - }, - } - prepared := &preparedBundle{ - ArchivePath: "/bundle.tar", - Manifest: backup.Manifest{ArchivePath: "/bundle.tar"}, - cleanup: func() {}, - } - return cand, prepared, nil - } - - analyzeBackupCategoriesFunc = func(ctx context.Context, archivePath string, logger *logging.Logger) ([]Category, error) { - return nil, errors.New("simulated analysis failure") - } - - logger := logging.New(logging.GetDefaultLogger().GetLevel(), false) - cfg := &config.Config{BaseDir: "/base"} - ui := &fakeRestoreWorkflowUI{ - confirmRestore: true, - modeErr: errors.New("unexpected SelectRestoreMode call"), - } - - if err := runRestoreWorkflowWithUI(context.Background(), cfg, logger, "vtest", ui); err != nil { - t.Fatalf("runRestoreWorkflowWithUI error: %v", err) - } - - data, err := fakeFS.ReadFile("/tmp/proxsave/restore_backup_location.txt") - if err != nil { - t.Fatalf("expected safety backup location file: %v", err) - } - want := "/tmp/proxsave/restore_backup_20200102_030405.tar.gz" - if got := string(data); got != want { - t.Fatalf("restore_backup_location.txt=%q want %q", got, want) - } - if _, err := fakeFS.Stat(want); err != nil { - t.Fatalf("expected safety backup archive %s to exist: %v", want, err) - } -} diff --git a/internal/orchestrator/restore_workflow_ui.go b/internal/orchestrator/restore_workflow_ui.go index 5244f6c..f90c1ad 100644 --- a/internal/orchestrator/restore_workflow_ui.go +++ b/internal/orchestrator/restore_workflow_ui.go @@ -17,7 +17,6 @@ import ( ) var prepareRestoreBundleFunc = prepareRestoreBundleWithUI -var analyzeBackupCategoriesFunc = AnalyzeBackupCategories func prepareRestoreBundleWithUI(ctx context.Context, cfg *config.Config, logger *logging.Logger, version string, ui RestoreWorkflowUI) (*decryptCandidate, *preparedBundle, error) { candidate, err := selectBackupCandidateWithUI(ctx, ui, cfg, logger, false) @@ -46,10 +45,6 @@ func runRestoreWorkflowWithUI(ctx context.Context, cfg *config.Config, logger *l done := logging.DebugStart(logger, "restore workflow (ui)", "version=%s", version) defer func() { done(err) }() - if removed, failed := cleanupOldRestoreStageDirs(restoreFS, logger, nowRestore(), tempDirCleanupAge); removed > 0 || failed > 0 { - logger.Debug("Restore staging cleanup (older than %s): removed=%d failed=%d", tempDirCleanupAge, removed, failed) - } - restoreHadWarnings := false defer func() { if err == nil { @@ -93,63 +88,42 @@ func runRestoreWorkflowWithUI(ctx context.Context, cfg *config.Config, logger *l } logger.Info("Analyzing backup contents...") - availableCategories, analysisErr := analyzeBackupCategoriesFunc(ctx, prepared.ArchivePath, logger) + availableCategories, err := AnalyzeBackupCategories(prepared.ArchivePath, logger) + if err != nil { + logger.Warning("Could not analyze categories: %v", err) + logger.Info("Falling back to full restore mode") + return runFullRestoreWithUI(ctx, ui, candidate, prepared, destRoot, logger, cfg.DryRun) + } var ( mode RestoreMode selectedCategories []Category ) - - if analysisErr != nil { - logger.Warning("Backup category analysis failed: %v", analysisErr) - if err := ui.ShowMessage(ctx, "Safe full restore (analysis unavailable)", - "Backup category analysis failed.\n\n"+ - "ProxSave will proceed with a SAFE full restore.\n\n"+ - "Safety features remain enabled (staging, transactional apply with rollback timers, and safety backups).\n\n"+ - "Note: Categories missing from the archive will be skipped automatically."); err != nil { + for { + mode, err = ui.SelectRestoreMode(ctx, systemType) + if err != nil { return err } - mode = RestoreModeFull - - backupType := DetectBackupType(candidate.Manifest) - switch backupType { - case SystemTypePVE, SystemTypePBS: - availableCategories = GetCategoriesForSystem(string(backupType)) - if len(availableCategories) == 0 { - availableCategories = GetAllCategories() - } - default: - availableCategories = GetAllCategories() + if mode != RestoreModeCustom { + selectedCategories = GetCategoriesForMode(mode, systemType, availableCategories) + break } - selectedCategories = append([]Category{}, availableCategories...) - } else { - for { - mode, err = ui.SelectRestoreMode(ctx, systemType) - if err != nil { - return err - } - - if mode != RestoreModeCustom { - selectedCategories = GetCategoriesForMode(mode, systemType, availableCategories) - break - } - selectedCategories, err = ui.SelectCategories(ctx, availableCategories, systemType) - if err != nil { - if errors.Is(err, errRestoreBackToMode) { - continue - } - return err + selectedCategories, err = ui.SelectCategories(ctx, availableCategories, systemType) + if err != nil { + if errors.Is(err, errRestoreBackToMode) { + continue } - break + return err } + break + } - if mode == RestoreModeCustom { - selectedCategories, err = maybeAddRecommendedCategoriesForTFA(ctx, ui, logger, selectedCategories, availableCategories) - if err != nil { - return err - } + if mode == RestoreModeCustom { + selectedCategories, err = maybeAddRecommendedCategoriesForTFA(ctx, ui, logger, selectedCategories, availableCategories) + if err != nil { + return err } } @@ -523,29 +497,13 @@ func runRestoreWorkflowWithUI(ctx context.Context, cfg *config.Config, logger *l stageLogPath := "" stageRoot := "" - stageWarningsStart := int64(0) - stageNetworkInstalled := false - preserveStage := preserveRestoreStagingFromEnv() || cfg.DryRun if len(plan.StagedCategories) > 0 { - stageRoot, err = createRestoreStageDir() - if err != nil { - return fmt.Errorf("failed to create staging directory: %w", err) - } + stageRoot = stageDestRoot() logger.Info("") logger.Info("Staging %d sensitive category(ies) to: %s", len(plan.StagedCategories), stageRoot) - if logger != nil { - stageWarningsStart = logger.WarningCount() + if err := restoreFS.MkdirAll(stageRoot, 0o755); err != nil { + return fmt.Errorf("failed to create staging directory %s: %w", stageRoot, err) } - defer func() { - if strings.TrimSpace(stageRoot) == "" || preserveStage { - return - } - if cleanupErr := restoreFS.RemoveAll(stageRoot); cleanupErr != nil { - logger.Warning("Failed to remove staging directory %s: %v", stageRoot, cleanupErr) - } else { - logger.Debug("Staging directory removed: %s", stageRoot) - } - }() if stageLog, err := extractSelectiveArchive(ctx, prepared.ArchivePath, stageRoot, plan.StagedCategories, RestoreModeCustom, logger); err != nil { if errors.Is(err, ErrRestoreAborted) || input.IsAborted(err) { @@ -624,13 +582,6 @@ func runRestoreWorkflowWithUI(ctx context.Context, cfg *config.Config, logger *l restoreHadWarnings = true logger.Warning("Notifications staged apply: %v", err) } - if err := maybeApplyPBSConfigsViaAPIFromStage(ctx, logger, plan, stageRoot, cfg.DryRun, pbsServicesStopped); err != nil { - if errors.Is(err, ErrRestoreAborted) || input.IsAborted(err) { - return err - } - restoreHadWarnings = true - logger.Warning("PBS staged API apply: %v", err) - } } stageRootForNetworkApply := stageRoot @@ -641,7 +592,6 @@ func runRestoreWorkflowWithUI(ctx context.Context, cfg *config.Config, logger *l restoreHadWarnings = true logger.Warning("Network staged install: %v", err) } else if installed { - stageNetworkInstalled = true stageRootForNetworkApply = "" logging.DebugStep(logger, "restore", "Network staged install completed: configuration written to /etc (no reload); live apply will use system paths") } @@ -797,17 +747,8 @@ func runRestoreWorkflowWithUI(ctx context.Context, cfg *config.Config, logger *l } } - if strings.TrimSpace(stageRoot) != "" { - if plan != nil && plan.HasCategoryID("network") && !stageNetworkInstalled { - preserveStage = true - } - if logger != nil && logger.WarningCount() > stageWarningsStart { - preserveStage = true - } - } - logger.Info("") - if restoreHadWarnings || (logger != nil && logger.HasWarnings()) { + if restoreHadWarnings { logger.Warning("Restore completed with warnings.") } else { logger.Info("Restore completed successfully.") @@ -824,12 +765,7 @@ func runRestoreWorkflowWithUI(ctx context.Context, cfg *config.Config, logger *l logger.Info("Export detailed log: %s", exportLogPath) } if stageRoot != "" { - if preserveStage { - logger.Info("Staging directory (preserved): %s", stageRoot) - logger.Warning("Staging directory contains sensitive files. Remove it when no longer needed: rm -rf %s", stageRoot) - } else { - logger.Info("Staging directory (auto-cleanup): %s", stageRoot) - } + logger.Info("Staging directory: %s", stageRoot) } if stageLogPath != "" { logger.Info("Staging detailed log: %s", stageLogPath) diff --git a/internal/orchestrator/restore_workflow_warnings_test.go b/internal/orchestrator/restore_workflow_warnings_test.go index 6bb31eb..3da4b6b 100644 --- a/internal/orchestrator/restore_workflow_warnings_test.go +++ b/internal/orchestrator/restore_workflow_warnings_test.go @@ -1,13 +1,11 @@ package orchestrator import ( - "bytes" "context" "errors" "io/fs" "os" "path/filepath" - "strings" "testing" "time" @@ -132,93 +130,3 @@ func TestRunRestoreWorkflow_FstabMergeFails_ContinuesWithWarnings(t *testing.T) t.Fatalf("expected warnings") } } - -func TestRunRestoreWorkflow_FinalSummaryReflectsLoggedWarnings(t *testing.T) { - origRestoreFS := restoreFS - origRestoreCmd := restoreCmd - origRestoreSystem := restoreSystem - origRestoreTime := restoreTime - origCompatFS := compatFS - origPrepare := prepareRestoreBundleFunc - origSafetyFS := safetyFS - origSafetyNow := safetyNow - t.Cleanup(func() { - restoreFS = origRestoreFS - restoreCmd = origRestoreCmd - restoreSystem = origRestoreSystem - restoreTime = origRestoreTime - compatFS = origCompatFS - prepareRestoreBundleFunc = origPrepare - safetyFS = origSafetyFS - safetyNow = origSafetyNow - }) - - fakeFS := NewFakeFS() - t.Cleanup(func() { _ = os.RemoveAll(fakeFS.Root) }) - restoreFS = fakeFS - compatFS = fakeFS - safetyFS = fakeFS - - fakeNow := &FakeTime{Current: time.Date(2020, 1, 2, 3, 4, 5, 0, time.UTC)} - restoreTime = fakeNow - safetyNow = fakeNow.Now - - restoreSystem = fakeSystemDetector{systemType: SystemTypePVE} - restoreCmd = runOnlyRunner{} - - // Make compatibility detection treat this as PVE. - if err := fakeFS.AddFile("/usr/bin/qm", []byte("x")); err != nil { - t.Fatalf("fakeFS.AddFile: %v", err) - } - - // Minimal backup tar with one file from the "services" category. - tmpTar := filepath.Join(t.TempDir(), "bundle.tar") - if err := writeTarFile(tmpTar, map[string]string{ - "etc/timezone": "UTC\n", - }); err != nil { - t.Fatalf("writeTarFile: %v", err) - } - tarBytes, err := os.ReadFile(tmpTar) - if err != nil { - t.Fatalf("os.ReadFile: %v", err) - } - if err := fakeFS.WriteFile("/bundle.tar", tarBytes, 0o640); err != nil { - t.Fatalf("fakeFS.WriteFile(/bundle.tar): %v", err) - } - - prepareRestoreBundleFunc = func(ctx context.Context, cfg *config.Config, logger *logging.Logger, version string, ui RestoreWorkflowUI) (*decryptCandidate, *preparedBundle, error) { - cand := &decryptCandidate{ - DisplayBase: "test", - Manifest: &backup.Manifest{ - CreatedAt: fakeNow.Now(), - ClusterMode: "standalone", - ProxmoxType: "pbs", // force incompatibility warning on a PVE system - ScriptVersion: "vtest", - }, - } - prepared := &preparedBundle{ - ArchivePath: "/bundle.tar", - Manifest: backup.Manifest{ArchivePath: "/bundle.tar"}, - cleanup: func() {}, - } - return cand, prepared, nil - } - - var out bytes.Buffer - logger := logging.New(types.LogLevelInfo, false) - logger.SetOutput(&out) - cfg := &config.Config{BaseDir: "/base"} - ui := &fakeRestoreWorkflowUI{ - mode: RestoreModeCustom, - categories: []Category{mustCategoryByID(t, "services")}, - confirmRestore: true, - confirmCompatible: true, - } - - if err := runRestoreWorkflowWithUI(context.Background(), cfg, logger, "vtest", ui); err != nil { - t.Fatalf("runRestoreWorkflowWithUI error: %v", err) - } - if !strings.Contains(out.String(), "Restore completed with warnings.") { - t.Fatalf("expected final summary to report warnings; got output:\n%s", out.String()) - } -} diff --git a/internal/orchestrator/selective.go b/internal/orchestrator/selective.go index 733e1bc..4b05ea2 100644 --- a/internal/orchestrator/selective.go +++ b/internal/orchestrator/selective.go @@ -6,7 +6,6 @@ import ( "context" "errors" "fmt" - "io" "os" "path" "sort" @@ -26,17 +25,12 @@ type SelectiveRestoreConfig struct { Metadata *backup.Manifest } -// AnalyzeBackupCategories detects which categories are available in the backup. -// It scans the archive streamingly (O(1) memory) and supports cancellation via ctx. -func AnalyzeBackupCategories(ctx context.Context, archivePath string, logger *logging.Logger) (categories []Category, err error) { +// AnalyzeBackupCategories detects which categories are available in the backup +func AnalyzeBackupCategories(archivePath string, logger *logging.Logger) (categories []Category, err error) { done := logging.DebugStart(logger, "analyze backup categories", "archive=%s", archivePath) defer func() { done(err) }() logger.Info("Analyzing backup categories...") - if ctx == nil { - ctx = context.Background() - } - // Open the archive and read all entry names file, err := restoreFS.Open(archivePath) if err != nil { @@ -45,63 +39,22 @@ func AnalyzeBackupCategories(ctx context.Context, archivePath string, logger *lo defer file.Close() // Create appropriate reader based on compression - reader, err := createDecompressionReader(ctx, file, archivePath) + reader, err := createDecompressionReader(context.Background(), file, archivePath) if err != nil { return nil, err } - if closer, ok := reader.(interface{ Close() error }); ok { - // When the archive isn't compressed, createDecompressionReader returns the same *os.File. - // Avoid double-closing; the underlying file is already closed by the defer above. - if rf, ok := reader.(*os.File); !ok || rf != file { - defer closer.Close() + defer func() { + if closer, ok := reader.(interface{ Close() error }); ok { + closer.Close() } - } + }() tarReader := tar.NewReader(reader) - allCategories := GetAllCategories() - if len(allCategories) == 0 { - return nil, nil - } - found := make([]bool, len(allCategories)) - foundCount := 0 - entriesScanned := 0 -scanLoop: - for { - if err := ctx.Err(); err != nil { - return nil, err - } - header, err := tarReader.Next() - if err != nil { - if errors.Is(err, io.EOF) { - break - } - return nil, fmt.Errorf("read archive entries: %w", err) - } - entriesScanned++ + archivePaths := collectArchivePaths(tarReader) + logger.Debug("Found %d entries in archive", len(archivePaths)) - for i := range allCategories { - if found[i] { - continue - } - if archiveEntryMatchesCategory(header.Name, allCategories[i]) { - found[i] = true - allCategories[i].IsAvailable = true - foundCount++ - if foundCount == len(allCategories) { - break scanLoop - } - } - } - } - logger.Debug("Scanned %d entries in archive", entriesScanned) - - availableCategories := make([]Category, 0, foundCount) - for i, cat := range allCategories { - if found[i] { - availableCategories = append(availableCategories, cat) - } - } + availableCategories := AnalyzeArchivePaths(archivePaths, GetAllCategories()) for _, cat := range availableCategories { logger.Debug("Category available: %s (%s)", cat.ID, cat.Name) } @@ -140,83 +93,52 @@ func AnalyzeArchivePaths(archivePaths []string, allCategories []Category) []Cate return availableCategories } -func collectArchivePaths(tarReader *tar.Reader) ([]string, error) { +func collectArchivePaths(tarReader *tar.Reader) []string { var archivePaths []string for { header, err := tarReader.Next() if err != nil { - if errors.Is(err, io.EOF) { - return archivePaths, nil - } - return archivePaths, err + break // EOF or error } archivePaths = append(archivePaths, header.Name) } + return archivePaths } // pathMatchesPattern checks if an archive path matches a category pattern func pathMatchesPattern(archivePath, pattern string) bool { - match := func(archivePath, pattern string) bool { - // Normalize paths - normArchive := archivePath - if !strings.HasPrefix(normArchive, "./") { - normArchive = "./" + normArchive - } - - normPattern := pattern - if !strings.HasPrefix(normPattern, "./") { - normPattern = "./" + normPattern - } - - if strings.ContainsAny(normPattern, "*?[") && !strings.HasSuffix(normPattern, "/") { - if ok, err := path.Match(normPattern, normArchive); err == nil && ok { - return true - } - } - - // Exact match - if normArchive == normPattern { - return true - } + // Normalize paths + normArchive := archivePath + if !strings.HasPrefix(normArchive, "./") { + normArchive = "./" + normArchive + } - // Directory prefix match - if strings.HasSuffix(normPattern, "/") { - if strings.HasPrefix(normArchive, normPattern) { - return true - } - } + normPattern := pattern + if !strings.HasPrefix(normPattern, "./") { + normPattern = "./" + normPattern + } - // Parent directory match - if strings.HasPrefix(normArchive, strings.TrimSuffix(normPattern, "/")+"/") { + if strings.ContainsAny(normPattern, "*?[") && !strings.HasSuffix(normPattern, "/") { + if ok, err := path.Match(normPattern, normArchive); err == nil && ok { return true } - - return false } - // Smart chunking stores large files as: - // - .chunked (marker file) - // - chunked_files/..chunk (chunk store) - // For category analysis, map these artifacts back to the original path. - candidates := []string{archivePath} - clean := strings.TrimPrefix(strings.TrimSpace(archivePath), "./") - - if strings.HasSuffix(clean, ".chunked") { - candidates = append(candidates, strings.TrimSuffix(clean, ".chunked")) + // Exact match + if normArchive == normPattern { + return true } - if strings.HasPrefix(clean, "chunked_files/") { - trimmed := strings.TrimPrefix(clean, "chunked_files/") - candidates = append(candidates, trimmed) - if original, ok := originalPathFromChunk(trimmed); ok { - candidates = append(candidates, original) + // Directory prefix match + if strings.HasSuffix(normPattern, "/") { + if strings.HasPrefix(normArchive, normPattern) { + return true } } - for _, candidate := range candidates { - if match(candidate, pattern) { - return true - } + // Parent directory match + if strings.HasPrefix(normArchive, strings.TrimSuffix(normPattern, "/")+"/") { + return true } return false @@ -361,7 +283,10 @@ func ShowCategorySelectionMenuWithReader(ctx context.Context, reader *bufio.Read selected = make(map[int]bool) case "c": // Continue - check if at least one category is selected - selectedCount := len(selected) + selectedCount := 0 + for range selected { + selectedCount++ + } if selectedCount == 0 { fmt.Println() @@ -392,11 +317,7 @@ func ShowCategorySelectionMenuWithReader(ctx context.Context, reader *bufio.Read // Toggle selection index := num - 1 - if selected[index] { - delete(selected, index) - } else { - selected[index] = true - } + selected[index] = !selected[index] } } } diff --git a/internal/orchestrator/selective_additional_test.go b/internal/orchestrator/selective_additional_test.go index e40256f..e6fac52 100644 --- a/internal/orchestrator/selective_additional_test.go +++ b/internal/orchestrator/selective_additional_test.go @@ -42,10 +42,7 @@ func TestCollectArchivePaths(t *testing.T) { } tr := tar.NewReader(bytes.NewReader(buf.Bytes())) - got, err := collectArchivePaths(tr) - if err != nil { - t.Fatalf("collectArchivePaths() error: %v", err) - } + got := collectArchivePaths(tr) if len(got) != len(entries) { t.Fatalf("collectArchivePaths() len=%d; want %d (paths=%v)", len(got), len(entries), got) diff --git a/internal/orchestrator/selective_pure_test.go b/internal/orchestrator/selective_pure_test.go index e79d4fe..7f0d766 100644 --- a/internal/orchestrator/selective_pure_test.go +++ b/internal/orchestrator/selective_pure_test.go @@ -11,8 +11,7 @@ func TestAnalyzeArchivePaths(t *testing.T) { paths := []string{ "./etc/pve/storage.cfg", - "./etc/network/interfaces.chunked", - "./chunked_files/etc/network/interfaces.001.chunk", + "./etc/network/interfaces", "./random/file", } @@ -41,8 +40,6 @@ func TestPathMatchesPatternVariants(t *testing.T) { }{ {"etc/pve/storage.cfg", "./etc/pve/", true}, {"./etc/network/interfaces", "./etc/network/interfaces", true}, - {"./etc/network/interfaces.chunked", "./etc/network/interfaces", true}, - {"./chunked_files/etc/network/interfaces.001.chunk", "./etc/network/interfaces", true}, {"./etc/network/interfaces.d/foo", "./etc/network/interfaces", false}, {"./var/log/syslog", "./etc/network/", false}, } diff --git a/internal/orchestrator/staging.go b/internal/orchestrator/staging.go index a418f0f..b3ebdaf 100644 --- a/internal/orchestrator/staging.go +++ b/internal/orchestrator/staging.go @@ -2,14 +2,13 @@ package orchestrator import ( "fmt" - "os" "path/filepath" "strings" - "time" - - "github.com/tis24dev/proxsave/internal/logging" + "sync/atomic" ) +var restoreStageSequence uint64 + func isStagedCategoryID(id string) bool { switch strings.TrimSpace(id) { case "network", @@ -48,70 +47,8 @@ func splitRestoreCategories(categories []Category) (normal []Category, staged [] return normal, staged, export } -func createRestoreStageDir() (string, error) { - base := "/tmp/proxsave" - if err := restoreFS.MkdirAll(base, 0o755); err != nil { - return "", fmt.Errorf("ensure staging base directory %s: %w", base, err) - } - - pattern := fmt.Sprintf("restore-stage-%s_pid%d-", nowRestore().Format("20060102-150405"), os.Getpid()) - dir, err := restoreFS.MkdirTemp(base, pattern) - if err != nil { - return "", fmt.Errorf("create staging directory under %s: %w", base, err) - } - return dir, nil -} - -func preserveRestoreStagingFromEnv() bool { - v := strings.TrimSpace(os.Getenv("PROXSAVE_PRESERVE_RESTORE_STAGING")) - if v == "" { - return false - } - switch strings.ToLower(v) { - case "1", "true", "yes", "y", "on": - return true - default: - return false - } -} - -func cleanupOldRestoreStageDirs(fs FS, logger *logging.Logger, now time.Time, maxAge time.Duration) (removed int, failed int) { +func stageDestRoot() string { base := "/tmp/proxsave" - entries, err := fs.ReadDir(base) - if err != nil { - return 0, 0 - } - - cutoff := now.Add(-maxAge) - for _, entry := range entries { - if entry == nil || !entry.IsDir() { - continue - } - name := strings.TrimSpace(entry.Name()) - if name == "" || !strings.HasPrefix(name, "restore-stage-") { - continue - } - fullPath := filepath.Join(base, name) - info, err := fs.Stat(fullPath) - if err != nil || info == nil || !info.IsDir() { - continue - } - if info.ModTime().After(cutoff) { - continue - } - - if err := fs.RemoveAll(fullPath); err != nil { - failed++ - if logger != nil { - logger.Debug("Failed to cleanup restore staging directory %s: %v", fullPath, err) - } - continue - } - removed++ - if logger != nil { - logger.Debug("Cleaned old restore staging directory: %s", fullPath) - } - } - - return removed, failed + seq := atomic.AddUint64(&restoreStageSequence, 1) + return filepath.Join(base, fmt.Sprintf("restore-stage-%s_%d", nowRestore().Format("20060102-150405"), seq)) } diff --git a/internal/pbs/namespaces.go b/internal/pbs/namespaces.go index 92bf869..345ac56 100644 --- a/internal/pbs/namespaces.go +++ b/internal/pbs/namespaces.go @@ -2,14 +2,18 @@ package pbs import ( "bytes" + "context" "encoding/json" + "errors" "fmt" - "os" "os/exec" "path/filepath" + "time" + + "github.com/tis24dev/proxsave/internal/safefs" ) -var execCommand = exec.Command +var execCommand = exec.CommandContext // Namespace represents a single PBS namespace. type Namespace struct { @@ -26,12 +30,15 @@ type listNamespacesResponse struct { // ListNamespaces tries the PBS CLI first and, if it fails, // falls back to the filesystem to infer namespaces. -func ListNamespaces(datastoreName, datastorePath string) ([]Namespace, bool, error) { - if namespaces, err := listNamespacesViaCLI(datastoreName); err == nil { +func ListNamespaces(ctx context.Context, datastoreName, datastorePath string, ioTimeout time.Duration) ([]Namespace, bool, error) { + if namespaces, err := listNamespacesViaCLI(ctx, datastoreName); err == nil { return namespaces, false, nil } + if err := ctx.Err(); err != nil { + return nil, false, err + } - namespaces, err := discoverNamespacesFromFilesystem(datastorePath) + namespaces, err := discoverNamespacesFromFilesystem(ctx, datastorePath, ioTimeout) if err != nil { return nil, false, err } @@ -39,8 +46,13 @@ func ListNamespaces(datastoreName, datastorePath string) ([]Namespace, bool, err return namespaces, true, nil } -func listNamespacesViaCLI(datastore string) ([]Namespace, error) { +func listNamespacesViaCLI(ctx context.Context, datastore string) ([]Namespace, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + cmd := execCommand( + ctx, "proxmox-backup-manager", "datastore", "namespace", @@ -65,12 +77,12 @@ func listNamespacesViaCLI(datastore string) ([]Namespace, error) { return parsed.Data, nil } -func discoverNamespacesFromFilesystem(datastorePath string) ([]Namespace, error) { +func discoverNamespacesFromFilesystem(ctx context.Context, datastorePath string, ioTimeout time.Duration) ([]Namespace, error) { if datastorePath == "" { return nil, fmt.Errorf("datastore path is empty") } - entries, err := os.ReadDir(datastorePath) + entries, err := safefs.ReadDir(ctx, datastorePath, ioTimeout) if err != nil { return nil, fmt.Errorf("cannot read datastore path %s: %w", datastorePath, err) } @@ -91,12 +103,14 @@ func discoverNamespacesFromFilesystem(datastorePath string) ([]Namespace, error) subPath := filepath.Join(datastorePath, entry.Name()) for _, chk := range checkDirs { - if _, err := os.Stat(filepath.Join(subPath, chk)); err == nil { + if _, err := safefs.Stat(ctx, filepath.Join(subPath, chk), ioTimeout); err == nil { namespaces = append(namespaces, Namespace{ Ns: entry.Name(), Path: subPath, }) break + } else if errors.Is(err, safefs.ErrTimeout) { + return nil, err } } } diff --git a/internal/pbs/namespaces_test.go b/internal/pbs/namespaces_test.go index 494525d..ac358b9 100644 --- a/internal/pbs/namespaces_test.go +++ b/internal/pbs/namespaces_test.go @@ -1,6 +1,7 @@ package pbs import ( + "context" "encoding/json" "fmt" "os" @@ -65,7 +66,7 @@ func TestDiscoverNamespacesFromFilesystem_DetectsSupportedDirs(t *testing.T) { mustMkdirAll(t, filepath.Join(tmpDir, "host-ns", "host")) mustMkdirAll(t, filepath.Join(tmpDir, "nested-ns", "namespace")) - namespaces, err := discoverNamespacesFromFilesystem(tmpDir) + namespaces, err := discoverNamespacesFromFilesystem(context.Background(), tmpDir, 0) if err != nil { t.Fatalf("discover failed: %v", err) } @@ -104,7 +105,7 @@ func TestDiscoverNamespacesFromFilesystem_IgnoresNonDirectories(t *testing.T) { mustWriteFile(t, filepath.Join(tmpDir, "some-file.txt"), []byte("ignore me")) mustMkdirAll(t, filepath.Join(tmpDir, "valid-ns", "vm")) - namespaces, err := discoverNamespacesFromFilesystem(tmpDir) + namespaces, err := discoverNamespacesFromFilesystem(context.Background(), tmpDir, 0) if err != nil { t.Fatalf("discover failed: %v", err) } @@ -119,12 +120,12 @@ func TestDiscoverNamespacesFromFilesystem_IgnoresNonDirectories(t *testing.T) { } func TestDiscoverNamespacesFromFilesystem_Errors(t *testing.T) { - if _, err := discoverNamespacesFromFilesystem(""); err == nil || !strings.Contains(err.Error(), "datastore path is empty") { + if _, err := discoverNamespacesFromFilesystem(context.Background(), "", 0); err == nil || !strings.Contains(err.Error(), "datastore path is empty") { t.Fatalf("expected error for empty path, got %v", err) } missing := filepath.Join(t.TempDir(), "missing") - if _, err := discoverNamespacesFromFilesystem(missing); err == nil || !strings.Contains(err.Error(), "cannot read datastore path") { + if _, err := discoverNamespacesFromFilesystem(context.Background(), missing, 0); err == nil || !strings.Contains(err.Error(), "cannot read datastore path") { t.Fatalf("expected error for missing path, got %v", err) } } @@ -132,7 +133,7 @@ func TestDiscoverNamespacesFromFilesystem_Errors(t *testing.T) { func TestListNamespaces_CLISuccess(t *testing.T) { setExecCommandStub(t, "cli-success") - namespaces, usedFallback, err := ListNamespaces("dummy", t.TempDir()) + namespaces, usedFallback, err := ListNamespaces(context.Background(), "dummy", t.TempDir(), 0) if err != nil { t.Fatalf("ListNamespaces failed: %v", err) } @@ -155,7 +156,7 @@ func TestListNamespaces_CLIFallback(t *testing.T) { tmpDir := t.TempDir() mustMkdirAll(t, filepath.Join(tmpDir, "local", "vm")) - namespaces, usedFallback, err := ListNamespaces("dummy", tmpDir) + namespaces, usedFallback, err := ListNamespaces(context.Background(), "dummy", tmpDir, 0) if err != nil { t.Fatalf("ListNamespaces failed: %v", err) } @@ -171,7 +172,7 @@ func TestListNamespaces_CLIFallback(t *testing.T) { func TestListNamespacesViaCLI_ErrorIncludesStderr(t *testing.T) { setExecCommandStub(t, "cli-error") - if _, err := listNamespacesViaCLI("dummy"); err == nil || !strings.Contains(err.Error(), "stderr: CLI exploded") { + if _, err := listNamespacesViaCLI(context.Background(), "dummy"); err == nil || !strings.Contains(err.Error(), "stderr: CLI exploded") { t.Fatalf("expected stderr text in error, got %v", err) } } @@ -197,7 +198,7 @@ func TestHelperProcess(t *testing.T) { func setExecCommandStub(t *testing.T, scenario string) { t.Helper() original := execCommand - execCommand = func(string, ...string) *exec.Cmd { + execCommand = func(context.Context, string, ...string) *exec.Cmd { cmd := exec.Command(os.Args[0], "-test.run=TestHelperProcess", "--") cmd.Env = append(os.Environ(), "GO_WANT_HELPER_PROCESS=1", diff --git a/internal/safefs/safefs.go b/internal/safefs/safefs.go new file mode 100644 index 0000000..36b001a --- /dev/null +++ b/internal/safefs/safefs.go @@ -0,0 +1,155 @@ +package safefs + +import ( + "context" + "errors" + "fmt" + "io/fs" + "os" + "syscall" + "time" +) + +var ( + osStat = os.Stat + osReadDir = os.ReadDir + syscallStatfs = syscall.Statfs +) + +// ErrTimeout is a sentinel error used to classify filesystem operations that did not +// complete within the configured timeout. +var ErrTimeout = errors.New("filesystem operation timed out") + +// TimeoutError is returned when a filesystem operation exceeds its allowed duration. +// Note that this does not cancel the underlying kernel call; it only stops waiting. +type TimeoutError struct { + Op string + Path string + Timeout time.Duration +} + +func (e *TimeoutError) Error() string { + if e == nil { + return "filesystem operation timed out" + } + if e.Timeout > 0 { + return fmt.Sprintf("%s %s: timeout after %s", e.Op, e.Path, e.Timeout) + } + return fmt.Sprintf("%s %s: timeout", e.Op, e.Path) +} + +func (e *TimeoutError) Unwrap() error { return ErrTimeout } + +func effectiveTimeout(ctx context.Context, timeout time.Duration) time.Duration { + if timeout <= 0 { + return 0 + } + if deadline, ok := ctx.Deadline(); ok { + remaining := time.Until(deadline) + if remaining <= 0 { + return 0 + } + if remaining < timeout { + return remaining + } + } + return timeout +} + +func Stat(ctx context.Context, path string, timeout time.Duration) (fs.FileInfo, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + timeout = effectiveTimeout(ctx, timeout) + if timeout <= 0 { + return osStat(path) + } + + type result struct { + info fs.FileInfo + err error + } + ch := make(chan result, 1) + go func() { + info, err := osStat(path) + ch <- result{info: info, err: err} + }() + + timer := time.NewTimer(timeout) + defer timer.Stop() + + select { + case r := <-ch: + return r.info, r.err + case <-ctx.Done(): + return nil, ctx.Err() + case <-timer.C: + return nil, &TimeoutError{Op: "stat", Path: path, Timeout: timeout} + } +} + +func ReadDir(ctx context.Context, path string, timeout time.Duration) ([]os.DirEntry, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + timeout = effectiveTimeout(ctx, timeout) + if timeout <= 0 { + return osReadDir(path) + } + + type result struct { + entries []os.DirEntry + err error + } + ch := make(chan result, 1) + go func() { + entries, err := osReadDir(path) + ch <- result{entries: entries, err: err} + }() + + timer := time.NewTimer(timeout) + defer timer.Stop() + + select { + case r := <-ch: + return r.entries, r.err + case <-ctx.Done(): + return nil, ctx.Err() + case <-timer.C: + return nil, &TimeoutError{Op: "readdir", Path: path, Timeout: timeout} + } +} + +func Statfs(ctx context.Context, path string, timeout time.Duration) (syscall.Statfs_t, error) { + if err := ctx.Err(); err != nil { + return syscall.Statfs_t{}, err + } + timeout = effectiveTimeout(ctx, timeout) + if timeout <= 0 { + var stat syscall.Statfs_t + return stat, syscallStatfs(path, &stat) + } + + type result struct { + stat syscall.Statfs_t + err error + } + ch := make(chan result, 1) + go func() { + var stat syscall.Statfs_t + err := syscallStatfs(path, &stat) + ch <- result{stat: stat, err: err} + }() + + timer := time.NewTimer(timeout) + defer timer.Stop() + + select { + case r := <-ch: + return r.stat, r.err + case <-ctx.Done(): + return syscall.Statfs_t{}, ctx.Err() + case <-timer.C: + return syscall.Statfs_t{}, &TimeoutError{Op: "statfs", Path: path, Timeout: timeout} + } +} diff --git a/internal/safefs/safefs_test.go b/internal/safefs/safefs_test.go new file mode 100644 index 0000000..30646ae --- /dev/null +++ b/internal/safefs/safefs_test.go @@ -0,0 +1,74 @@ +package safefs + +import ( + "context" + "errors" + "os" + "syscall" + "testing" + "time" +) + +func TestStat_ReturnsTimeoutError(t *testing.T) { + prev := osStat + defer func() { osStat = prev }() + + osStat = func(string) (os.FileInfo, error) { + select {} + } + + start := time.Now() + _, err := Stat(context.Background(), "/does/not/matter", 25*time.Millisecond) + if err == nil || !errors.Is(err, ErrTimeout) { + t.Fatalf("Stat err = %v; want timeout", err) + } + if time.Since(start) > 250*time.Millisecond { + t.Fatalf("Stat took too long: %s", time.Since(start)) + } +} + +func TestReadDir_ReturnsTimeoutError(t *testing.T) { + prev := osReadDir + defer func() { osReadDir = prev }() + + osReadDir = func(string) ([]os.DirEntry, error) { + select {} + } + + start := time.Now() + _, err := ReadDir(context.Background(), "/does/not/matter", 25*time.Millisecond) + if err == nil || !errors.Is(err, ErrTimeout) { + t.Fatalf("ReadDir err = %v; want timeout", err) + } + if time.Since(start) > 250*time.Millisecond { + t.Fatalf("ReadDir took too long: %s", time.Since(start)) + } +} + +func TestStatfs_ReturnsTimeoutError(t *testing.T) { + prev := syscallStatfs + defer func() { syscallStatfs = prev }() + + syscallStatfs = func(string, *syscall.Statfs_t) error { + select {} + } + + start := time.Now() + _, err := Statfs(context.Background(), "/does/not/matter", 25*time.Millisecond) + if err == nil || !errors.Is(err, ErrTimeout) { + t.Fatalf("Statfs err = %v; want timeout", err) + } + if time.Since(start) > 250*time.Millisecond { + t.Fatalf("Statfs took too long: %s", time.Since(start)) + } +} + +func TestStat_PropagatesContextCancellation(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + _, err := Stat(ctx, "/does/not/matter", 50*time.Millisecond) + if !errors.Is(err, context.Canceled) { + t.Fatalf("Stat err = %v; want context.Canceled", err) + } +} From 05f0fb135d305bf1397ad696ff129f78a7c296b5 Mon Sep 17 00:00:00 2001 From: Damiano <71268257+tis24dev@users.noreply.github.com> Date: Fri, 20 Feb 2026 16:39:31 +0100 Subject: [PATCH 23/24] Replace PXAR sampling with bounded sampler Remove the legacy PXAR sampling implementation and related privilege-sensitive helpers/tests, and introduce a bounded filesystem sampler. Details: - Added fs_sampling_bounded.go and its test to provide a new bounded sampling mechanism. - Removed large legacy PXAR sampling code: sampleDirectories, sampleFiles, computePxarWorkerRoots and many helper functions/types (deterministic shuffling, root selector, hashing, uniquePaths, etc.). - Cleaned up Collector and CollectorConfig by removing obsolete fields (PxarIntraConcurrency, PxarScanFanoutLevel, PxarScanMaxRoots, PxarStopOnCap, PxarEnumWorkers, PxarEnumBudgetMs) and related cache/mutex fields (rootsCache, rootsMu). - Deleted privilege-sensitive detection and its tests (collector_privilege_sensitive.*). - Updated tests and usages to stop referencing removed config fields and adjust debug logging messages. - Minor fixes/formatting: adjusted ownership check bracing in runtime_helpers.go, small struct/tag spacing changes, and other tidy-ups. This refactor simplifies the collector by replacing the complex fanout-based sampling and privileged heuristics with a bounded sampling approach and removes deprecated code paths. --- cmd/proxsave/runtime_helpers.go | 12 +- internal/backup/chunking_hardening_test.go | 192 ---- internal/backup/collector.go | 863 +----------------- .../backup/collector_config_extra_test.go | 13 +- internal/backup/collector_paths.go | 1 - .../collector_pbs_commands_coverage_test.go | 3 - internal/backup/collector_pbs_datastore.go | 6 +- .../collector_pbs_datastore_inventory.go | 10 +- .../collector_pbs_notifications_summary.go | 14 +- .../backup/collector_privilege_sensitive.go | 173 ---- .../collector_privilege_sensitive_test.go | 146 --- internal/backup/collector_pve.go | 137 --- internal/backup/collector_pxar_roots_test.go | 43 - internal/backup/collector_pxar_test.go | 729 --------------- internal/backup/fs_sampling_bounded.go | 148 +++ internal/backup/fs_sampling_bounded_test.go | 137 +++ .../backup/optimizations_structured_test.go | 16 +- internal/config/config.go | 12 - internal/config/templates/backup.env | 10 +- internal/identity/identity_test.go | 16 +- internal/orchestrator/artifact_guard_test.go | 33 - internal/orchestrator/backup_safety_test.go | 8 +- internal/orchestrator/chunking_paths.go | 28 - .../decrypt_tui_simulation_test.go | 1 - internal/orchestrator/orchestrator.go | 16 - internal/orchestrator/orchestrator_test.go | 17 +- .../pbs_notifications_api_apply_test.go | 1 - .../pbs_service_restart_order_test.go | 75 -- .../orchestrator/pve_safe_apply_mappings.go | 1 - .../orchestrator/pve_staged_apply_test.go | 2 +- .../orchestrator/restore_access_control_ui.go | 11 +- .../restore_chunking_selective_test.go | 112 --- internal/orchestrator/restore_filesystem.go | 10 +- internal/orchestrator/restore_ha_test.go | 1 - internal/orchestrator/restore_sdn_test.go | 1 - .../restore_tui_simulation_test.go | 1 - .../restore_workflow_abort_test.go | 8 +- .../restore_workflow_ui_tfa_test.go | 1 - internal/orchestrator/staging_test.go | 118 --- internal/orchestrator/tui_hooks.go | 1 - internal/orchestrator/tui_simulation_test.go | 24 +- internal/security/security.go | 50 +- internal/security/security_test.go | 34 +- internal/support/support_test.go | 12 +- 44 files changed, 419 insertions(+), 2828 deletions(-) delete mode 100644 internal/backup/chunking_hardening_test.go delete mode 100644 internal/backup/collector_privilege_sensitive.go delete mode 100644 internal/backup/collector_privilege_sensitive_test.go delete mode 100644 internal/backup/collector_pxar_roots_test.go delete mode 100644 internal/backup/collector_pxar_test.go create mode 100644 internal/backup/fs_sampling_bounded.go create mode 100644 internal/backup/fs_sampling_bounded_test.go delete mode 100644 internal/orchestrator/artifact_guard_test.go delete mode 100644 internal/orchestrator/chunking_paths.go delete mode 100644 internal/orchestrator/pbs_service_restart_order_test.go delete mode 100644 internal/orchestrator/restore_chunking_selective_test.go delete mode 100644 internal/orchestrator/staging_test.go diff --git a/cmd/proxsave/runtime_helpers.go b/cmd/proxsave/runtime_helpers.go index 5312a73..b95d90e 100644 --- a/cmd/proxsave/runtime_helpers.go +++ b/cmd/proxsave/runtime_helpers.go @@ -273,13 +273,13 @@ func detectFilesystemInfo(ctx context.Context, backend storage.Storage, path str return nil, nil } - if !fsInfo.SupportsOwnership { - if backend != nil && backend.Location() == storage.LocationCloud { - logger.Debug("%s [%s] does not support ownership changes (cloud remote); chown/chmod already disabled", path, fsInfo.Type) - } else { - logger.Info("%s [%s] does not support ownership changes; chown/chmod will be skipped", path, fsInfo.Type) - } + if !fsInfo.SupportsOwnership { + if backend != nil && backend.Location() == storage.LocationCloud { + logger.Debug("%s [%s] does not support ownership changes (cloud remote); chown/chmod already disabled", path, fsInfo.Type) + } else { + logger.Info("%s [%s] does not support ownership changes; chown/chmod will be skipped", path, fsInfo.Type) } + } return fsInfo, nil } diff --git a/internal/backup/chunking_hardening_test.go b/internal/backup/chunking_hardening_test.go deleted file mode 100644 index 1f0a33c..0000000 --- a/internal/backup/chunking_hardening_test.go +++ /dev/null @@ -1,192 +0,0 @@ -package backup - -import ( - "bytes" - "context" - "encoding/json" - "os" - "path/filepath" - "testing" - "time" - - "github.com/tis24dev/proxsave/internal/logging" - "github.com/tis24dev/proxsave/internal/types" -) - -func TestDiscoverChunksSortsNumerically(t *testing.T) { - root := t.TempDir() - base := filepath.Join(root, "chunked_files", "big.bin") - if err := os.MkdirAll(filepath.Dir(base), 0o755); err != nil { - t.Fatal(err) - } - - // Create chunk files in mixed order (including >999) to ensure numeric sort. - chunkPaths := []string{ - filepath.Join(filepath.Dir(base), "big.bin.010.chunk"), - filepath.Join(filepath.Dir(base), "big.bin.001.chunk"), - filepath.Join(filepath.Dir(base), "big.bin.1000.chunk"), - filepath.Join(filepath.Dir(base), "big.bin.002.chunk"), - filepath.Join(filepath.Dir(base), "big.bin.999.chunk"), - filepath.Join(filepath.Dir(base), "big.bin.003.chunk"), - } - for _, p := range chunkPaths { - if err := os.WriteFile(p, []byte("x"), 0o640); err != nil { - t.Fatalf("write %s: %v", p, err) - } - } - - chunks, err := discoverChunks(base) - if err != nil { - t.Fatalf("discoverChunks: %v", err) - } - - got := make([]int, 0, len(chunks)) - for _, c := range chunks { - got = append(got, c.Index) - } - want := []int{1, 2, 3, 10, 999, 1000} - if len(got) != len(want) { - t.Fatalf("got %d chunks, want %d (%v)", len(got), len(want), got) - } - for i := range want { - if got[i] != want[i] { - t.Fatalf("numeric sort mismatch: got %v want %v", got, want) - } - } -} - -func TestReassembleChunkedFiles_SkipsWhenLastChunkMissing(t *testing.T) { - root := t.TempDir() - logger := logging.New(types.LogLevelError, false) - - originalPath := filepath.Join(root, "file.bin") - markerPath := originalPath + ".chunked" - chunkDir := filepath.Join(root, "chunked_files") - if err := os.MkdirAll(chunkDir, 0o755); err != nil { - t.Fatal(err) - } - - meta := chunkedFileMetadata{ - Version: 1, - SizeBytes: 9, - ChunkSizeBytes: 4, - ChunkCount: 3, // expects 3 chunks - Mode: 0o640, - UID: -1, - GID: -1, - ModTimeUnixNano: time.Now().UnixNano(), - } - payload, _ := json.Marshal(meta) - if err := os.WriteFile(markerPath, payload, 0o640); err != nil { - t.Fatal(err) - } - - // Only two chunks present -> should not reassemble. - if err := os.WriteFile(filepath.Join(chunkDir, "file.bin.001.chunk"), []byte("abcd"), 0o640); err != nil { - t.Fatal(err) - } - if err := os.WriteFile(filepath.Join(chunkDir, "file.bin.002.chunk"), []byte("efgh"), 0o640); err != nil { - t.Fatal(err) - } - - if err := ReassembleChunkedFiles(logger, root); err != nil { - t.Fatalf("ReassembleChunkedFiles: %v", err) - } - - if _, err := os.Stat(originalPath); !os.IsNotExist(err) { - t.Fatalf("expected original not to be created, stat err=%v", err) - } - if _, err := os.Stat(markerPath); err != nil { - t.Fatalf("expected marker to remain, stat err=%v", err) - } -} - -func TestReassembleChunkedFiles_SkipsWhenSHA256Mismatch(t *testing.T) { - root := t.TempDir() - logger := logging.New(types.LogLevelError, false) - - originalPath := filepath.Join(root, "file.bin") - markerPath := originalPath + ".chunked" - chunkDir := filepath.Join(root, "chunked_files") - if err := os.MkdirAll(chunkDir, 0o755); err != nil { - t.Fatal(err) - } - - data := []byte("hello world") - meta := chunkedFileMetadata{ - Version: 1, - SizeBytes: int64(len(data)), - ChunkSizeBytes: 8, - ChunkCount: 2, - SHA256: "0000000000000000000000000000000000000000000000000000000000000000", // wrong - Mode: 0o640, - UID: -1, - GID: -1, - ModTimeUnixNano: time.Now().UnixNano(), - } - payload, _ := json.Marshal(meta) - if err := os.WriteFile(markerPath, payload, 0o640); err != nil { - t.Fatal(err) - } - if err := os.WriteFile(filepath.Join(chunkDir, "file.bin.001.chunk"), data[:8], 0o640); err != nil { - t.Fatal(err) - } - if err := os.WriteFile(filepath.Join(chunkDir, "file.bin.002.chunk"), data[8:], 0o640); err != nil { - t.Fatal(err) - } - - if err := ReassembleChunkedFiles(logger, root); err != nil { - t.Fatalf("ReassembleChunkedFiles: %v", err) - } - - if _, err := os.Stat(originalPath); !os.IsNotExist(err) { - t.Fatalf("expected original not to be created, stat err=%v", err) - } - if _, err := os.Stat(markerPath); err != nil { - t.Fatalf("expected marker to remain, stat err=%v", err) - } -} - -func TestChunkAndReassemble_PreservesModeAndMtime(t *testing.T) { - root := t.TempDir() - logger := logging.New(types.LogLevelError, false) - - original := bytes.Repeat([]byte("ABCDEFGHIJKLMNOP"), 6) // 96 bytes - target := filepath.Join(root, "subdir", "large.bin") - if err := os.MkdirAll(filepath.Dir(target), 0o755); err != nil { - t.Fatal(err) - } - if err := os.WriteFile(target, original, 0o600); err != nil { - t.Fatal(err) - } - mt := time.Unix(1700000000, 123456789) - if err := os.Chtimes(target, mt, mt); err != nil { - t.Fatal(err) - } - - if err := chunkLargeFiles(context.Background(), logger, root, 16, 64); err != nil { - t.Fatalf("chunkLargeFiles: %v", err) - } - if err := ReassembleChunkedFiles(logger, root); err != nil { - t.Fatalf("ReassembleChunkedFiles: %v", err) - } - - got, err := os.ReadFile(target) - if err != nil { - t.Fatalf("read reassembled: %v", err) - } - if !bytes.Equal(got, original) { - t.Fatalf("content mismatch: got %d bytes want %d bytes", len(got), len(original)) - } - - info, err := os.Stat(target) - if err != nil { - t.Fatalf("stat reassembled: %v", err) - } - if info.Mode().Perm() != 0o600 { - t.Fatalf("mode mismatch: got %o want %o", info.Mode().Perm(), 0o600) - } - if info.ModTime().UnixNano() != mt.UnixNano() { - t.Fatalf("mtime mismatch: got %d want %d", info.ModTime().UnixNano(), mt.UnixNano()) - } -} diff --git a/internal/backup/collector.go b/internal/backup/collector.go index 4704252..925d298 100644 --- a/internal/backup/collector.go +++ b/internal/backup/collector.go @@ -5,10 +5,7 @@ import ( "context" "errors" "fmt" - "hash/fnv" "io" - "io/fs" - "math/rand" "os" "os/exec" "path/filepath" @@ -43,16 +40,14 @@ type FileSummary struct { // Collector handles backup data collection type Collector struct { - logger *logging.Logger - config *CollectorConfig - stats *CollectionStats - statsMu sync.Mutex - tempDir string - proxType types.ProxmoxType - dryRun bool - rootsMu sync.RWMutex - rootsCache map[string][]string - deps CollectorDeps + logger *logging.Logger + config *CollectorConfig + stats *CollectionStats + statsMu sync.Mutex + tempDir string + proxType types.ProxmoxType + dryRun bool + deps CollectorDeps // clusteredPVE records whether cluster mode was detected during PVE collection. clusteredPVE bool @@ -197,12 +192,6 @@ type CollectorConfig struct { // PXAR scanning tuning PxarDatastoreConcurrency int - PxarIntraConcurrency int - PxarScanFanoutLevel int - PxarScanMaxRoots int - PxarStopOnCap bool - PxarEnumWorkers int - PxarEnumBudgetMs int PxarFileIncludePatterns []string PxarFileExcludePatterns []string @@ -271,21 +260,6 @@ func (c *CollectorConfig) Validate() error { if c.PxarDatastoreConcurrency <= 0 { c.PxarDatastoreConcurrency = 3 } - if c.PxarIntraConcurrency <= 0 { - c.PxarIntraConcurrency = 4 - } - if c.PxarScanFanoutLevel <= 0 { - c.PxarScanFanoutLevel = 1 - } - if c.PxarScanMaxRoots <= 0 { - c.PxarScanMaxRoots = 2048 - } - if c.PxarEnumWorkers <= 0 { - c.PxarEnumWorkers = 4 - } - if c.PxarEnumBudgetMs < 0 { - c.PxarEnumBudgetMs = 0 - } if c.MaxPVEBackupSizeBytes < 0 { return fmt.Errorf("MAX_PVE_BACKUP_SIZE must be >= 0") } @@ -310,14 +284,13 @@ func NewCollector(logger *logging.Logger, config *CollectorConfig, tempDir strin // NewCollectorWithDeps creates a collector with explicit dependency overrides (for testing). func NewCollectorWithDeps(logger *logging.Logger, config *CollectorConfig, tempDir string, proxType types.ProxmoxType, dryRun bool, deps CollectorDeps) *Collector { return &Collector{ - logger: logger, - config: config, - stats: &CollectionStats{}, - tempDir: tempDir, - proxType: proxType, - dryRun: dryRun, - rootsCache: make(map[string][]string), - deps: deps, + logger: logger, + config: config, + stats: &CollectionStats{}, + tempDir: tempDir, + proxType: proxType, + dryRun: dryRun, + deps: deps, } } @@ -382,11 +355,6 @@ func GetDefaultCollectorConfig() *CollectorConfig { SystemRootPrefix: "", PxarDatastoreConcurrency: 3, - PxarIntraConcurrency: 4, - PxarScanFanoutLevel: 2, - PxarScanMaxRoots: 2048, - PxarEnumWorkers: 4, - PxarEnumBudgetMs: 0, PxarFileIncludePatterns: nil, PxarFileExcludePatterns: nil, @@ -1352,807 +1320,6 @@ func (c *Collector) collectCommandOptional(ctx context.Context, cmd, output, des } } -func (c *Collector) sampleDirectories(ctx context.Context, root string, maxDepth, limit int) ([]string, error) { - results := make([]string, 0, limit) - if limit <= 0 { - return results, nil - } - - startDirs, err := c.computePxarWorkerRoots(ctx, root, "directories") - if err != nil { - return results, err - } - - if len(startDirs) == 0 { - c.logger.Debug("PXAR sampleDirectories: root=%s completed (selected=0 visited=0 duration=0s)", root) - return results, nil - } - - stopErr := errors.New("directory sample limit reached") - start := time.Now() - ctx, cancel := context.WithCancel(ctx) - defer cancel() - - workerLimit := c.config.PxarIntraConcurrency - if workerLimit <= 0 { - workerLimit = 1 - } - - var ( - wg sync.WaitGroup - sem = make(chan struct{}, workerLimit) - resMu sync.Mutex - progressMu sync.Mutex - errMu sync.Mutex - visited int - lastLog = start - firstErr error - limitReached bool - ) - - appendResult := func(rel string) (bool, bool) { - resMu.Lock() - defer resMu.Unlock() - if limitReached { - return false, true - } - results = append(results, filepath.ToSlash(rel)) - if len(results) >= limit { - limitReached = true - cancel() - return true, true - } - return true, false - } - - logProgress := func() { - progressMu.Lock() - defer progressMu.Unlock() - visited++ - if time.Since(lastLog) > 2*time.Second { - resMu.Lock() - selected := len(results) - resMu.Unlock() - c.logger.Debug("PXAR sampleDirectories: root=%s visited=%d selected=%d", root, visited, selected) - lastLog = time.Now() - } - } - - recordError := func(err error) { - errMu.Lock() - if firstErr == nil { - firstErr = err - cancel() - } - errMu.Unlock() - } - - for _, startPath := range startDirs { - if err := ctx.Err(); err != nil { - break - } - wg.Add(1) - sem <- struct{}{} - go func(startDir string) { - defer func() { - <-sem - wg.Done() - }() - - walkErr := filepath.WalkDir(startDir, func(path string, d fs.DirEntry, walkErr error) error { - if walkErr != nil { - return walkErr - } - - if err := ctx.Err(); err != nil { - return err - } - - if path == root { - return nil - } - - if c.shouldExclude(path) { - if d.IsDir() { - return filepath.SkipDir - } - return nil - } - - rel, relErr := filepath.Rel(root, path) - if relErr != nil { - return relErr - } - - if d.IsDir() { - logProgress() - depth := strings.Count(rel, string(filepath.Separator)) - if depth >= maxDepth { - return filepath.SkipDir - } - if _, hitLimit := appendResult(rel); hitLimit { - return stopErr - } - } - return nil - }) - - if walkErr != nil && !errors.Is(walkErr, stopErr) && !errors.Is(walkErr, context.Canceled) { - recordError(walkErr) - } - }(startPath) - } - - wg.Wait() - - if firstErr != nil { - return results, firstErr - } - resMu.Lock() - limitWasReached := limitReached - resMu.Unlock() - - if err := ctx.Err(); err != nil && !errors.Is(err, context.Canceled) && !limitWasReached { - return results, err - } - - resMu.Lock() - selected := len(results) - resMu.Unlock() - - progressMu.Lock() - totalVisited := visited - progressMu.Unlock() - - c.logger.Debug("PXAR sampleDirectories: root=%s completed (selected=%d visited=%d duration=%s)", - root, selected, totalVisited, time.Since(start).Truncate(time.Millisecond)) - return results, nil -} - -func (c *Collector) sampleFiles(ctx context.Context, root string, includePatterns, excludePatterns []string, maxDepth, limit int) ([]FileSummary, error) { - results := make([]FileSummary, 0, limit) - if limit <= 0 { - return results, nil - } - - entries, err := os.ReadDir(root) - if err != nil { - return results, err - } - - stopErr := errors.New("file sample limit reached") - start := time.Now() - ctx, cancel := context.WithCancel(ctx) - defer cancel() - - workerLimit := c.config.PxarIntraConcurrency - if workerLimit <= 0 { - workerLimit = 1 - } - - var ( - wg sync.WaitGroup - sem = make(chan struct{}, workerLimit) - resMu sync.Mutex - progressMu sync.Mutex - errMu sync.Mutex - visited int - matched int - lastLog = start - firstErr error - limitReached bool - ) - - appendResult := func(summary FileSummary) (bool, bool) { - resMu.Lock() - defer resMu.Unlock() - if limitReached { - return false, true - } - results = append(results, summary) - if len(results) >= limit { - limitReached = true - cancel() - return true, true - } - return true, false - } - - logProgress := func() { - progressMu.Lock() - defer progressMu.Unlock() - visited++ - if time.Since(lastLog) > 2*time.Second { - resMu.Lock() - selected := len(results) - resMu.Unlock() - c.logger.Debug("PXAR sampleFiles: root=%s visited=%d matched=%d selected=%d", root, visited, matched, selected) - lastLog = time.Now() - } - } - - incMatched := func() { - progressMu.Lock() - matched++ - progressMu.Unlock() - } - - recordError := func(err error) { - errMu.Lock() - if firstErr == nil { - firstErr = err - cancel() - } - errMu.Unlock() - } - - processFile := func(path string, info fs.FileInfo) error { - if c.shouldExclude(path) { - return nil - } - rel, err := filepath.Rel(root, path) - if err != nil { - return err - } - logProgress() - - if len(excludePatterns) > 0 && matchAnyPattern(excludePatterns, filepath.Base(path), rel) { - return nil - } - if len(includePatterns) > 0 && !matchAnyPattern(includePatterns, filepath.Base(path), rel) { - return nil - } - incMatched() - - summary := FileSummary{ - RelativePath: filepath.ToSlash(rel), - SizeBytes: info.Size(), - SizeHuman: FormatBytes(info.Size()), - ModTime: info.ModTime(), - } - if _, hitLimit := appendResult(summary); hitLimit { - return stopErr - } - return nil - } - - limitTriggered := false - - for _, entry := range entries { - path := filepath.Join(root, entry.Name()) - if entry.IsDir() { - continue - } - - info, infoErr := entry.Info() - if infoErr != nil { - continue - } - if err := processFile(path, info); err != nil { - if errors.Is(err, stopErr) { - limitTriggered = true - break - } - return results, err - } - } - - if limitTriggered { - resMu.Lock() - selected := len(results) - resMu.Unlock() - progressMu.Lock() - totalVisited := visited - totalMatched := matched - progressMu.Unlock() - c.logger.Debug("PXAR sampleFiles: root=%s completed (selected=%d matched=%d visited=%d duration=%s)", - root, selected, totalMatched, totalVisited, time.Since(start).Truncate(time.Millisecond)) - return results, nil - } - - startDirs, err := c.computePxarWorkerRoots(ctx, root, "files") - if err != nil { - return results, err - } - - if len(startDirs) == 0 { - resMu.Lock() - selected := len(results) - resMu.Unlock() - progressMu.Lock() - totalVisited := visited - totalMatched := matched - progressMu.Unlock() - c.logger.Debug("PXAR sampleFiles: root=%s completed (selected=%d matched=%d visited=%d duration=%s)", - root, selected, totalMatched, totalVisited, time.Since(start).Truncate(time.Millisecond)) - return results, nil - } - - for _, startPath := range startDirs { - if err := ctx.Err(); err != nil { - break - } - wg.Add(1) - sem <- struct{}{} - go func(startDir string) { - defer func() { - <-sem - wg.Done() - }() - - walkErr := filepath.WalkDir(startDir, func(path string, d fs.DirEntry, walkErr error) error { - if walkErr != nil { - return walkErr - } - - if err := ctx.Err(); err != nil { - return err - } - - if d.IsDir() { - if c.shouldExclude(path) { - return filepath.SkipDir - } - rel, relErr := filepath.Rel(root, path) - if relErr != nil { - return relErr - } - depth := strings.Count(rel, string(filepath.Separator)) - if depth >= maxDepth { - return filepath.SkipDir - } - return nil - } - - info, infoErr := d.Info() - if infoErr != nil { - return nil - } - return processFile(path, info) - }) - - if walkErr != nil && !errors.Is(walkErr, stopErr) && !errors.Is(walkErr, context.Canceled) { - recordError(walkErr) - } - }(startPath) - } - - wg.Wait() - - if firstErr != nil { - return results, firstErr - } - - resMu.Lock() - limitWasReached := limitReached - selected := len(results) - resMu.Unlock() - - if err := ctx.Err(); err != nil && !errors.Is(err, context.Canceled) && !limitWasReached { - return results, err - } - - progressMu.Lock() - totalVisited := visited - totalMatched := matched - progressMu.Unlock() - - c.logger.Debug("PXAR sampleFiles: root=%s completed (selected=%d matched=%d visited=%d duration=%s)", - root, selected, totalMatched, totalVisited, time.Since(start).Truncate(time.Millisecond)) - return results, nil -} - -func (c *Collector) computePxarWorkerRoots(ctx context.Context, root, purpose string) ([]string, error) { - cacheKey := fmt.Sprintf("%s|fanout=%d|max=%d", root, c.config.PxarScanFanoutLevel, c.config.PxarScanMaxRoots) - c.rootsMu.RLock() - if cached, ok := c.rootsCache[cacheKey]; ok && len(cached) > 0 { - result := append([]string(nil), cached...) - c.rootsMu.RUnlock() - c.logger.Debug("PXAR worker roots cache hit (%s): root=%s count=%d", purpose, root, len(result)) - return result, nil - } - c.rootsMu.RUnlock() - - fanout := c.config.PxarScanFanoutLevel - if fanout < 1 { - fanout = 1 - } - maxRoots := c.config.PxarScanMaxRoots - if maxRoots <= 0 { - maxRoots = 2048 - } - enumWorkers := c.config.PxarEnumWorkers - if enumWorkers <= 0 { - enumWorkers = 1 - } - budgetMs := c.config.PxarEnumBudgetMs - - baseCtx, baseCancel := context.WithCancel(ctx) - defer baseCancel() - ctxFanout := baseCtx - if budgetMs > 0 { - ctxBudget, cancel := context.WithTimeout(baseCtx, time.Duration(budgetMs)*time.Millisecond) - ctxFanout = ctxBudget - defer cancel() - } - - start := time.Now() - c.logger.Debug("PXAR fanout enumeration (%s): root=%s fanout=%d max_roots=%d workers=%d budget_ms=%d", - purpose, root, fanout, maxRoots, enumWorkers, budgetMs) - - levels := make(map[int][]string, fanout) - selector := newPxarRootSelector(maxRoots) - var selectorMu sync.Mutex - queue := []string{root} - var foundAny atomic.Bool - stopOnCap := c.config.PxarStopOnCap - - const ( - pxarStopReasonNone int32 = iota - pxarStopReasonCap - pxarStopReasonBudget - ) - var stopReason atomic.Int32 - - var progressVisited atomic.Int64 - var progressScanned atomic.Int64 - var progressExcluded atomic.Int64 - var progressLeaves atomic.Int64 - var progressReadErr atomic.Int64 - var progressDepth atomic.Int64 - var progressCandidates atomic.Int64 - var progressCapped atomic.Bool - - var progressStop chan struct{} - if c.logger.GetLevel() >= types.LogLevelDebug { - progressStop = make(chan struct{}) - ticker := time.NewTicker(5 * time.Second) - go func() { - defer ticker.Stop() - for { - select { - case <-ticker.C: - c.logger.Debug("PXAR progress (%s): depth=%d visited=%d scanned=%d excluded=%d leaves=%d candidates=%d capped=%v elapsed=%s", - purpose, - progressDepth.Load(), - progressVisited.Load(), - progressScanned.Load(), - progressExcluded.Load(), - progressLeaves.Load(), - progressCandidates.Load(), - progressCapped.Load(), - time.Since(start).Truncate(time.Millisecond)) - case <-progressStop: - return - case <-ctxFanout.Done(): - return - } - } - }() - defer close(progressStop) - } - -fanoutLoop: - for depth := 0; depth < fanout; depth++ { - if len(queue) == 0 { - break - } - if err := ctxFanout.Err(); err != nil { - break - } - - progressDepth.Store(int64(depth + 1)) - next := make([]string, 0, len(queue)) - var nextMu sync.Mutex - - jobCh := make(chan string) - var wg sync.WaitGroup - - workerCount := enumWorkers - if workerCount > len(queue) { - workerCount = len(queue) - } - if workerCount < 1 { - workerCount = 1 - } - - shuffledBases := append([]string(nil), queue...) - shuffleStringsDeterministic(shuffledBases, deterministicSeed(root, purpose, fmt.Sprintf("depth-%d", depth))) - - for w := 0; w < workerCount; w++ { - wg.Add(1) - go func() { - defer wg.Done() - for basePath := range jobCh { - if err := ctxFanout.Err(); err != nil { - return - } - - progressVisited.Add(1) - entries, err := os.ReadDir(basePath) - if err != nil { - progressReadErr.Add(1) - continue - } - progressScanned.Add(int64(len(entries))) - shuffleDirEntriesDeterministic(entries, deterministicSeed(basePath, purpose, fmt.Sprintf("depth-%d", depth))) - - for _, entry := range entries { - if err := ctxFanout.Err(); err != nil { - return - } - - if !entry.IsDir() { - continue - } - child := filepath.Join(basePath, entry.Name()) - if c.shouldExclude(child) { - progressExcluded.Add(1) - continue - } - foundAny.Store(true) - - level := depth + 1 - if level < fanout { - nextMu.Lock() - levels[level] = append(levels[level], child) - next = append(next, child) - nextMu.Unlock() - continue - } - - selectorMu.Lock() - prevCapped := selector.capped - selector.consider(child) - progressLeaves.Add(1) - progressCandidates.Store(int64(selector.total)) - currentCapped := selector.capped - selectorMu.Unlock() - - if !prevCapped && currentCapped { - progressCapped.Store(true) - c.logger.Debug("PXAR progress (%s): candidate cap reached (limit=%d) at total=%d", - purpose, maxRoots, selector.total) - if stopOnCap { - if stopReason.CompareAndSwap(pxarStopReasonNone, pxarStopReasonCap) { - c.logger.Debug("PXAR early termination (%s): stop_on_cap=true limit=%d candidates=%d depth=%d elapsed=%s", - purpose, - maxRoots, - selector.total, - depth+1, - time.Since(start).Truncate(time.Millisecond)) - } - baseCancel() - return - } - } - } - } - }() - } - - for _, base := range shuffledBases { - select { - case <-ctxFanout.Done(): - break fanoutLoop - default: - jobCh <- base - } - } - close(jobCh) - wg.Wait() - - if err := ctxFanout.Err(); err != nil { - break - } - - c.logger.Debug("PXAR depth %d/%d done: bases=%d next_bases=%d leaves=%d excluded=%d readErrs=%d elapsed=%s", - depth+1, - fanout, - len(queue), - len(next), - progressLeaves.Load(), - progressExcluded.Load(), - progressReadErr.Load(), - time.Since(start).Truncate(time.Millisecond)) - queue = next - } - - if budgetMs > 0 && errors.Is(ctxFanout.Err(), context.DeadlineExceeded) { - stopReason.CompareAndSwap(pxarStopReasonNone, pxarStopReasonBudget) - c.logger.Debug("PXAR early termination (%s): enumeration budget exceeded (%dms)", purpose, budgetMs) - } - - if !foundAny.Load() { - return nil, nil - } - - roots := selector.results() - capped := selector.capped - totalCandidates := selector.total - if len(roots) == 0 { - for level := fanout - 1; level >= 1; level-- { - if dirs := levels[level]; len(dirs) > 0 { - c.logger.Debug("PXAR fallback to level=%d: dirs=%d (limit=%d)", level, len(dirs), maxRoots) - roots = uniquePaths(dirs) - totalCandidates = len(dirs) - if maxRoots > 0 && len(roots) > maxRoots { - c.logger.Debug("PXAR downsample: from=%d to=%d", len(roots), maxRoots) - roots = downsampleRoots(roots, maxRoots) - capped = true - } - break - } - } - } - - if len(roots) == 0 { - return nil, nil - } - - c.logger.Debug("PXAR worker roots (%s): root=%s fanout=%d count=%d candidates=%d capped=%v duration=%s", - purpose, - root, - fanout, - len(roots), - totalCandidates, - capped, - time.Since(start).Truncate(time.Millisecond)) - c.rootsMu.Lock() - c.rootsCache[cacheKey] = append([]string(nil), roots...) - c.rootsMu.Unlock() - return roots, nil -} - -func downsampleRoots(roots []string, limit int) []string { - if limit <= 0 || len(roots) <= limit { - return roots - } - step := len(roots) / limit - if step <= 1 { - return roots[:limit] - } - result := make([]string, 0, limit) - for i := 0; i < len(roots) && len(result) < limit; i += step { - result = append(result, roots[i]) - } - if len(result) < limit { - for i := len(roots) - 1; i >= 0 && len(result) < limit; i-- { - result = append(result, roots[i]) - } - } - if len(result) > limit { - result = result[:limit] - } - return result -} - -func shuffleStringsDeterministic(items []string, seed int64) { - if len(items) <= 1 { - return - } - r := rand.New(rand.NewSource(seed)) - for i := len(items) - 1; i > 0; i-- { - j := r.Intn(i + 1) - items[i], items[j] = items[j], items[i] - } -} - -func shuffleDirEntriesDeterministic(entries []fs.DirEntry, seed int64) { - if len(entries) <= 1 { - return - } - r := rand.New(rand.NewSource(seed)) - for i := len(entries) - 1; i > 0; i-- { - j := r.Intn(i + 1) - entries[i], entries[j] = entries[j], entries[i] - } -} - -func deterministicSeed(parts ...string) int64 { - hasher := fnv.New64a() - for _, p := range parts { - _, _ = hasher.Write([]byte(p)) - _, _ = hasher.Write([]byte{0}) - } - return int64(hasher.Sum64()) -} - -type pxarRootCandidate struct { - path string - weight uint32 -} - -type pxarRootSelector struct { - limit int - items []pxarRootCandidate - total int - capped bool - maxIdx int - maxWeight uint32 -} - -func newPxarRootSelector(limit int) *pxarRootSelector { - return &pxarRootSelector{ - limit: limit, - maxIdx: -1, - } -} - -func (s *pxarRootSelector) consider(path string) { - s.total++ - if s.limit <= 0 { - s.items = append(s.items, pxarRootCandidate{path: path}) - return - } - weight := hashPath(path) - if len(s.items) < s.limit { - s.items = append(s.items, pxarRootCandidate{path: path, weight: weight}) - if weight > s.maxWeight || s.maxIdx == -1 { - s.maxWeight = weight - s.maxIdx = len(s.items) - 1 - } - return - } - s.capped = true - if weight >= s.maxWeight { - return - } - s.items[s.maxIdx] = pxarRootCandidate{path: path, weight: weight} - s.recomputeMax() -} - -func (s *pxarRootSelector) recomputeMax() { - if len(s.items) == 0 { - s.maxIdx = -1 - s.maxWeight = 0 - return - } - maxIdx := 0 - maxWeight := s.items[0].weight - for i := 1; i < len(s.items); i++ { - if s.items[i].weight > maxWeight { - maxWeight = s.items[i].weight - maxIdx = i - } - } - s.maxIdx = maxIdx - s.maxWeight = maxWeight -} - -func (s *pxarRootSelector) results() []string { - if len(s.items) == 0 { - return nil - } - roots := make([]string, len(s.items)) - for i, item := range s.items { - roots[i] = item.path - } - return uniquePaths(roots) -} - -func hashPath(path string) uint32 { - h := fnv.New32a() - _, _ = h.Write([]byte(path)) - return h.Sum32() -} - -func uniquePaths(paths []string) []string { - if len(paths) == 0 { - return paths - } - seen := make(map[string]struct{}, len(paths)) - unique := make([]string, 0, len(paths)) - for _, path := range paths { - if _, ok := seen[path]; ok { - continue - } - seen[path] = struct{}{} - unique = append(unique, path) - } - return unique -} - func matchAnyPattern(patterns []string, name, relative string) bool { if len(patterns) == 0 { return true diff --git a/internal/backup/collector_config_extra_test.go b/internal/backup/collector_config_extra_test.go index ae15aa4..e13f6cd 100644 --- a/internal/backup/collector_config_extra_test.go +++ b/internal/backup/collector_config_extra_test.go @@ -28,7 +28,7 @@ func TestCollectorConfigValidateDefaultsAndErrors(t *testing.T) { if err := cfg.Validate(); err != nil { t.Fatalf("unexpected error for minimal valid config: %v", err) } - if cfg.PxarDatastoreConcurrency != 3 || cfg.PxarIntraConcurrency != 4 || cfg.PxarScanFanoutLevel != 1 || cfg.PxarScanMaxRoots != 2048 || cfg.PxarEnumWorkers != 4 { + if cfg.PxarDatastoreConcurrency != 3 { t.Fatalf("defaults not applied correctly: %+v", cfg) } } @@ -69,17 +69,6 @@ func TestCollectorConfigValidateEmptyExcludePattern(t *testing.T) { } } -func TestCollectorConfigValidateNormalizesNegativeBudget(t *testing.T) { - cfg := &CollectorConfig{BackupVMConfigs: true} - cfg.PxarEnumBudgetMs = -1 - if err := cfg.Validate(); err != nil { - t.Fatalf("unexpected error: %v", err) - } - if cfg.PxarEnumBudgetMs != 0 { - t.Fatalf("expected PxarEnumBudgetMs to be normalized to 0, got %d", cfg.PxarEnumBudgetMs) - } -} - func TestCollectorConfigValidateRequiresAbsoluteSystemRootPrefix(t *testing.T) { cfg := &CollectorConfig{BackupVMConfigs: true} cfg.SystemRootPrefix = "relative/path" diff --git a/internal/backup/collector_paths.go b/internal/backup/collector_paths.go index 7b39b64..83c6ec4 100644 --- a/internal/backup/collector_paths.go +++ b/internal/backup/collector_paths.go @@ -20,4 +20,3 @@ func (c *Collector) proxsaveCommandsDir(component string) string { func (c *Collector) proxsaveRuntimeDir(component string) string { return c.proxsaveInfoDir("runtime", component) } - diff --git a/internal/backup/collector_pbs_commands_coverage_test.go b/internal/backup/collector_pbs_commands_coverage_test.go index 11011a5..b96c44f 100644 --- a/internal/backup/collector_pbs_commands_coverage_test.go +++ b/internal/backup/collector_pbs_commands_coverage_test.go @@ -119,7 +119,6 @@ func TestCollectPBSPxarMetadataProcessesMultipleDatastores(t *testing.T) { tmp := t.TempDir() cfg := GetDefaultCollectorConfig() cfg.PxarDatastoreConcurrency = 2 - cfg.PxarIntraConcurrency = 1 collector := NewCollector(newTestLogger(), cfg, tmp, types.ProxmoxBS, false) @@ -322,7 +321,6 @@ func TestCollectPBSConfigsEndToEndWithStubs(t *testing.T) { cfg := GetDefaultCollectorConfig() cfg.PBSConfigPath = pbsRoot cfg.PxarDatastoreConcurrency = 2 - cfg.PxarIntraConcurrency = 1 deps := CollectorDeps{ LookPath: func(name string) (string, error) { return "/bin/" + name, nil }, @@ -402,7 +400,6 @@ func TestCollectPBSPxarMetadataStopsOnFirstDatastoreError(t *testing.T) { tmp := t.TempDir() cfg := GetDefaultCollectorConfig() cfg.PxarDatastoreConcurrency = 2 - cfg.PxarIntraConcurrency = 1 collector := NewCollector(newTestLogger(), cfg, tmp, types.ProxmoxBS, false) diff --git a/internal/backup/collector_pbs_datastore.go b/internal/backup/collector_pbs_datastore.go index 77f427e..0b5c2ba 100644 --- a/internal/backup/collector_pbs_datastore.go +++ b/internal/backup/collector_pbs_datastore.go @@ -107,15 +107,11 @@ func (c *Collector) collectPBSPxarMetadata(ctx context.Context, datastores []pbs if dsWorkers <= 0 { dsWorkers = 1 } - intraWorkers := c.config.PxarIntraConcurrency - if intraWorkers <= 0 { - intraWorkers = 1 - } mode := "sequential" if dsWorkers > 1 { mode = fmt.Sprintf("parallel (%d workers)", dsWorkers) } - c.logger.Debug("PXAR metadata concurrency: datastores=%s, per-datastore workers=%d", mode, intraWorkers) + c.logger.Debug("PXAR metadata concurrency: datastores=%s", mode) pxarRoot := c.proxsaveInfoDir("pbs", "pxar") metaRoot := filepath.Join(pxarRoot, "metadata") diff --git a/internal/backup/collector_pbs_datastore_inventory.go b/internal/backup/collector_pbs_datastore_inventory.go index c549206..72bbb02 100644 --- a/internal/backup/collector_pbs_datastore_inventory.go +++ b/internal/backup/collector_pbs_datastore_inventory.go @@ -653,11 +653,11 @@ func extractFstabReferencedFiles(content string) []string { } keys := map[string]struct{}{ - "credentials": {}, - "cred": {}, - "passwd": {}, - "passfile": {}, - "keyfile": {}, + "credentials": {}, + "cred": {}, + "passwd": {}, + "passfile": {}, + "keyfile": {}, "identityfile": {}, } diff --git a/internal/backup/collector_pbs_notifications_summary.go b/internal/backup/collector_pbs_notifications_summary.go index 180e163..244cb8d 100644 --- a/internal/backup/collector_pbs_notifications_summary.go +++ b/internal/backup/collector_pbs_notifications_summary.go @@ -11,13 +11,13 @@ import ( ) type pbsNotificationSnapshotSummary struct { - Present bool `json:"present"` - Bytes int64 `json:"bytes,omitempty"` - Total int `json:"total,omitempty"` - BuiltIn int `json:"built_in,omitempty"` - Custom int `json:"custom,omitempty"` - Names []string `json:"names,omitempty"` - Error string `json:"error,omitempty"` + Present bool `json:"present"` + Bytes int64 `json:"bytes,omitempty"` + Total int `json:"total,omitempty"` + BuiltIn int `json:"built_in,omitempty"` + Custom int `json:"custom,omitempty"` + Names []string `json:"names,omitempty"` + Error string `json:"error,omitempty"` } type pbsNotificationsConfigFilesSummary struct { diff --git a/internal/backup/collector_privilege_sensitive.go b/internal/backup/collector_privilege_sensitive.go deleted file mode 100644 index 8138468..0000000 --- a/internal/backup/collector_privilege_sensitive.go +++ /dev/null @@ -1,173 +0,0 @@ -package backup - -import ( - "os" - "strconv" - "strings" -) - -const ( - uidMapPath = "/proc/self/uid_map" - gidMapPath = "/proc/self/gid_map" - systemdContainerPath = "/run/systemd/container" -) - -type unprivilegedContainerContext struct { - Detected bool - Details string -} - -func (c *Collector) depDetectUnprivilegedContainer() unprivilegedContainerContext { - if c == nil { - return unprivilegedContainerContext{} - } - if c.deps.DetectUnprivilegedContainer != nil { - ok, details := c.deps.DetectUnprivilegedContainer() - return unprivilegedContainerContext{Detected: ok, Details: strings.TrimSpace(details)} - } - ok, details := detectUnprivilegedContainer() - return unprivilegedContainerContext{Detected: ok, Details: details} -} - -// detectUnprivilegedContainer attempts to determine whether ProxSave is running in an -// "unprivileged container"-like context where low-level hardware/block access is typically -// restricted. -// -// Implementation note: -// - We primarily rely on user-namespace UID/GID maps. When UID/GID 0 inside maps to a -// non-zero host ID, we treat it as "unprivileged" (common for LXC unprivileged containers). -// - Container flavor is best-effort via /run/systemd/container (if present). -// -// The detection is intentionally conservative in what it changes: it is only used to -// downgrade *known privilege-sensitive command failures* from WARNING to SKIP. -func detectUnprivilegedContainer() (bool, string) { - uidShifted, uidHost := parseRootIDMapShift(readSmallFile(uidMapPath)) - gidShifted, gidHost := parseRootIDMapShift(readSmallFile(gidMapPath)) - if !uidShifted && !gidShifted { - return false, "" - } - - var parts []string - if uidShifted { - parts = append(parts, "uid_map=0->"+strconv.FormatUint(uidHost, 10)) - } - if gidShifted { - parts = append(parts, "gid_map=0->"+strconv.FormatUint(gidHost, 10)) - } - - if container := strings.TrimSpace(readSmallFile(systemdContainerPath)); container != "" { - parts = append(parts, "container="+container) - } - - return true, strings.Join(parts, " ") -} - -func readSmallFile(path string) string { - data, err := os.ReadFile(path) - if err != nil || len(data) == 0 { - return "" - } - // Avoid leaking NUL-separated content (e.g., /proc/*/environ). - return strings.ReplaceAll(string(data), "\x00", " ") -} - -// parseRootIDMapShift checks whether the mapping for UID/GID 0 is shifted (i.e., maps to a -// non-zero host ID). Returns (true, hostStart) when shifted. -func parseRootIDMapShift(content string) (bool, uint64) { - content = strings.TrimSpace(content) - if content == "" { - return false, 0 - } - for _, line := range strings.Split(content, "\n") { - fields := strings.Fields(strings.TrimSpace(line)) - if len(fields) < 3 { - continue - } - insideStart, err1 := strconv.ParseUint(fields[0], 10, 64) - hostStart, err2 := strconv.ParseUint(fields[1], 10, 64) - length, err3 := strconv.ParseUint(fields[2], 10, 64) - if err1 != nil || err2 != nil || err3 != nil { - continue - } - if length == 0 { - continue - } - // We only care about the range that covers "root" inside the namespace (UID/GID 0). - if insideStart == 0 { - if hostStart == 0 { - return false, 0 - } - return true, hostStart - } - } - return false, 0 -} - -func isPrivilegeSensitiveFailureCandidate(command string) bool { - switch command { - case "dmidecode", "blkid", "sensors", "smartctl": - return true - default: - return false - } -} - -func privilegeSensitiveFailureReason(command string, exitCode int, outputText string) string { - command = strings.TrimSpace(command) - if command == "" { - return "" - } - if !isPrivilegeSensitiveFailureCandidate(command) { - return "" - } - - lower := strings.ToLower(strings.TrimSpace(outputText)) - hasPerm := containsAny(lower, - "permission denied", - "operation not permitted", - "not permitted", - "access denied", - ) - - switch command { - case "dmidecode": - // dmidecode typically fails due to restricted access to DMI tables (/sys/firmware/dmi or /dev/mem). - if hasPerm || strings.Contains(lower, "/dev/mem") || strings.Contains(lower, "/sys/firmware/dmi") { - return "DMI tables not accessible" - } - case "blkid": - // In unprivileged LXC, blkid often exits 2 with empty output when block devices are not accessible. - if exitCode == 2 && lower == "" { - return "block devices not accessible; restore hint: automated fstab device remap (UUID/PARTUUID/LABEL) may be limited" - } - if hasPerm { - return "block devices not accessible; restore hint: automated fstab device remap (UUID/PARTUUID/LABEL) may be limited" - } - case "sensors": - // "No sensors found!" is common in virtualized/containerized environments. - if strings.Contains(lower, "no sensors found") { - return "no hardware sensors available" - } - if hasPerm { - return "hardware sensors not accessible" - } - case "smartctl": - if hasPerm { - return "SMART devices not accessible" - } - } - - return "" -} - -func containsAny(haystack string, needles ...string) bool { - for _, needle := range needles { - if needle == "" { - continue - } - if strings.Contains(haystack, needle) { - return true - } - } - return false -} diff --git a/internal/backup/collector_privilege_sensitive_test.go b/internal/backup/collector_privilege_sensitive_test.go deleted file mode 100644 index ffb3ae1..0000000 --- a/internal/backup/collector_privilege_sensitive_test.go +++ /dev/null @@ -1,146 +0,0 @@ -package backup - -import ( - "bytes" - "context" - "os" - "os/exec" - "path/filepath" - "strings" - "testing" - - "github.com/tis24dev/proxsave/internal/logging" - "github.com/tis24dev/proxsave/internal/types" -) - -func TestParseRootIDMapShift(t *testing.T) { - t.Run("identity mapping", func(t *testing.T) { - shifted, host := parseRootIDMapShift("0 0 4294967295\n") - if shifted || host != 0 { - t.Fatalf("shifted=%v host=%d; want false,0", shifted, host) - } - }) - - t.Run("shifted mapping", func(t *testing.T) { - shifted, host := parseRootIDMapShift("0 100000 65536\n") - if !shifted || host != 100000 { - t.Fatalf("shifted=%v host=%d; want true,100000", shifted, host) - } - }) - - t.Run("missing root range", func(t *testing.T) { - shifted, host := parseRootIDMapShift("1 100000 65536\n") - if shifted || host != 0 { - t.Fatalf("shifted=%v host=%d; want false,0", shifted, host) - } - }) -} - -func TestPrivilegeSensitiveFailureReason(t *testing.T) { - cases := []struct { - name string - command string - exitCode int - output string - want string - }{ - {"dmidecode perm", "dmidecode", 1, "/dev/mem: Permission denied", "DMI tables not accessible"}, - {"blkid exit2 empty", "blkid", 2, "", "block devices not accessible; restore hint: automated fstab device remap (UUID/PARTUUID/LABEL) may be limited"}, - {"blkid perm", "blkid", 2, "Permission denied", "block devices not accessible; restore hint: automated fstab device remap (UUID/PARTUUID/LABEL) may be limited"}, - {"sensors none", "sensors", 1, "No sensors found!", "no hardware sensors available"}, - {"smartctl perm", "smartctl", 1, "Permission denied", "SMART devices not accessible"}, - {"other ignored", "false", 1, "Permission denied", ""}, - } - - for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - got := privilegeSensitiveFailureReason(tc.command, tc.exitCode, tc.output) - if got != tc.want { - t.Fatalf("reason=%q; want %q", got, tc.want) - } - }) - } -} - -func TestSafeCmdOutput_DowngradesPrivilegeSensitiveFailureToSkip(t *testing.T) { - logger := logging.New(types.LogLevelInfo, false) - buf := &bytes.Buffer{} - logger.SetOutput(buf) - - cfg := GetDefaultCollectorConfig() - tmp := t.TempDir() - - deps := CollectorDeps{ - LookPath: func(string) (string, error) { return "/usr/sbin/dmidecode", nil }, - RunCommand: func(ctx context.Context, name string, args ...string) ([]byte, error) { - cmd := exec.Command("sh", "-c", "echo '/dev/mem: Permission denied' >&2; exit 1") - out, err := cmd.CombinedOutput() - return out, err - }, - DetectUnprivilegedContainer: func() (bool, string) { - return true, "uid_map=0->100000 container=lxc" - }, - } - c := NewCollectorWithDeps(logger, cfg, tmp, types.ProxmoxUnknown, false, deps) - - outPath := filepath.Join(tmp, "dmidecode.txt") - if err := c.safeCmdOutput(context.Background(), "dmidecode", outPath, "Hardware DMI information", false); err != nil { - t.Fatalf("safeCmdOutput error: %v", err) - } - - logText := buf.String() - if !strings.Contains(logText, "SKIP") { - t.Fatalf("expected SKIP in logs, got: %s", logText) - } - if strings.Contains(logText, "WARNING") { - t.Fatalf("expected no WARNING in logs, got: %s", logText) - } - if _, err := os.Stat(outPath); !os.IsNotExist(err) { - t.Fatalf("expected no output file to be created, stat err=%v", err) - } -} - -func TestCaptureCommandOutput_DowngradesBlkidExit2ToSkipInUnprivilegedContainer(t *testing.T) { - logger := logging.New(types.LogLevelInfo, false) - buf := &bytes.Buffer{} - logger.SetOutput(buf) - - cfg := GetDefaultCollectorConfig() - tmp := t.TempDir() - - deps := CollectorDeps{ - LookPath: func(string) (string, error) { return "/sbin/blkid", nil }, - RunCommand: func(ctx context.Context, name string, args ...string) ([]byte, error) { - cmd := exec.Command("sh", "-c", "exit 2") - out, err := cmd.CombinedOutput() - return out, err - }, - DetectUnprivilegedContainer: func() (bool, string) { - return true, "uid_map=0->100000 container=lxc" - }, - } - c := NewCollectorWithDeps(logger, cfg, tmp, types.ProxmoxUnknown, false, deps) - - outPath := filepath.Join(tmp, "blkid.txt") - data, err := c.captureCommandOutput(context.Background(), "blkid", outPath, "Block device identifiers (blkid)", false) - if err != nil { - t.Fatalf("captureCommandOutput returned error: %v", err) - } - if data != nil { - t.Fatalf("expected nil data on non-critical failure, got %q", string(data)) - } - - logText := buf.String() - if !strings.Contains(logText, "SKIP") { - t.Fatalf("expected SKIP in logs, got: %s", logText) - } - if !strings.Contains(strings.ToLower(logText), "restore hint") { - t.Fatalf("expected restore hint in logs, got: %s", logText) - } - if strings.Contains(logText, "WARNING") { - t.Fatalf("expected no WARNING in logs, got: %s", logText) - } - if _, err := os.Stat(outPath); !os.IsNotExist(err) { - t.Fatalf("expected no output file to be created, stat err=%v", err) - } -} diff --git a/internal/backup/collector_pve.go b/internal/backup/collector_pve.go index 766afa7..ed0a8a6 100644 --- a/internal/backup/collector_pve.go +++ b/internal/backup/collector_pve.go @@ -1919,143 +1919,6 @@ func (c *Collector) parseStorageConfigEntries() []pveStorageEntry { return entries } -func (c *Collector) sampleDirectoriesBounded(ctx context.Context, root string, maxDepth, limit int, ioTimeout time.Duration) ([]string, error) { - results := make([]string, 0, limit) - if limit <= 0 || maxDepth <= 0 { - return results, nil - } - - root = filepath.Clean(root) - stack := []string{root} - - for len(stack) > 0 && len(results) < limit { - if err := ctx.Err(); err != nil { - return results, err - } - dirPath := stack[len(stack)-1] - stack = stack[:len(stack)-1] - - entries, err := safefs.ReadDir(ctx, dirPath, ioTimeout) - if err != nil { - return results, err - } - - for _, entry := range entries { - if err := ctx.Err(); err != nil { - return results, err - } - if !entry.IsDir() { - continue - } - child := filepath.Join(dirPath, entry.Name()) - if c.shouldExclude(child) { - continue - } - - rel, relErr := filepath.Rel(root, child) - if relErr != nil || rel == "." || strings.HasPrefix(rel, "..") { - continue - } - rel = filepath.ToSlash(rel) - depth := strings.Count(rel, "/") - if depth >= maxDepth { - continue - } - - results = append(results, rel) - if len(results) >= limit { - break - } - if depth < maxDepth-1 { - stack = append(stack, child) - } - } - } - - return results, nil -} - -func (c *Collector) sampleFilesBounded(ctx context.Context, root string, includePatterns, excludePatterns []string, maxDepth, limit int, ioTimeout time.Duration) ([]FileSummary, error) { - results := make([]FileSummary, 0, limit) - if limit <= 0 { - return results, nil - } - - root = filepath.Clean(root) - stack := []string{root} - - for len(stack) > 0 && len(results) < limit { - if err := ctx.Err(); err != nil { - return results, err - } - dirPath := stack[len(stack)-1] - stack = stack[:len(stack)-1] - - entries, err := safefs.ReadDir(ctx, dirPath, ioTimeout) - if err != nil { - return results, err - } - - for _, entry := range entries { - if err := ctx.Err(); err != nil { - return results, err - } - - name := entry.Name() - full := filepath.Join(dirPath, name) - if c.shouldExclude(full) { - continue - } - - if entry.IsDir() { - rel, relErr := filepath.Rel(root, full) - if relErr != nil || rel == "." || strings.HasPrefix(rel, "..") { - continue - } - rel = filepath.ToSlash(rel) - depth := strings.Count(rel, "/") - if depth >= maxDepth { - continue - } - stack = append(stack, full) - continue - } - - rel, relErr := filepath.Rel(root, full) - if relErr != nil || rel == "." || strings.HasPrefix(rel, "..") { - continue - } - - if len(excludePatterns) > 0 && matchAnyPattern(excludePatterns, name, rel) { - continue - } - if len(includePatterns) > 0 && !matchAnyPattern(includePatterns, name, rel) { - continue - } - - info, err := safefs.Stat(ctx, full, ioTimeout) - if err != nil { - if errors.Is(err, safefs.ErrTimeout) { - return results, err - } - continue - } - - results = append(results, FileSummary{ - RelativePath: filepath.ToSlash(rel), - SizeBytes: info.Size(), - SizeHuman: FormatBytes(info.Size()), - ModTime: info.ModTime(), - }) - if len(results) >= limit { - break - } - } - } - - return results, nil -} - func (c *Collector) describeDiskUsage(ctx context.Context, path string, ioTimeout time.Duration) (string, error) { stat, err := safefs.Statfs(ctx, path, ioTimeout) if err != nil { diff --git a/internal/backup/collector_pxar_roots_test.go b/internal/backup/collector_pxar_roots_test.go deleted file mode 100644 index a03348d..0000000 --- a/internal/backup/collector_pxar_roots_test.go +++ /dev/null @@ -1,43 +0,0 @@ -package backup - -import ( - "context" - "os" - "path/filepath" - "testing" - - "github.com/tis24dev/proxsave/internal/types" -) - -func TestComputePxarWorkerRootsCachesResults(t *testing.T) { - root := t.TempDir() - for _, p := range []string{"a/one", "b", "c/d"} { - if err := os.MkdirAll(filepath.Join(root, p), 0o755); err != nil { - t.Fatalf("mkdir %s: %v", p, err) - } - } - - cfg := GetDefaultCollectorConfig() - cfg.PxarScanFanoutLevel = 2 - cfg.PxarScanMaxRoots = 2 - c := NewCollector(newTestLogger(), cfg, root, types.ProxmoxBS, false) - - ctx := context.Background() - first, err := c.computePxarWorkerRoots(ctx, root, "test") - if err != nil { - t.Fatalf("computePxarWorkerRoots error: %v", err) - } - if len(first) == 0 || len(first) > 2 { - t.Fatalf("unexpected roots count: %d", len(first)) - } - - // Remove the directory to ensure cached results are used. - os.RemoveAll(root) - second, err := c.computePxarWorkerRoots(ctx, root, "test") - if err != nil { - t.Fatalf("computePxarWorkerRoots (cached) error: %v", err) - } - if len(second) != len(first) { - t.Fatalf("cached results length mismatch: %d vs %d", len(second), len(first)) - } -} diff --git a/internal/backup/collector_pxar_test.go b/internal/backup/collector_pxar_test.go deleted file mode 100644 index 1626c1f..0000000 --- a/internal/backup/collector_pxar_test.go +++ /dev/null @@ -1,729 +0,0 @@ -package backup - -import ( - "context" - "fmt" - "os" - "path/filepath" - "reflect" - "strings" - "testing" - - "github.com/tis24dev/proxsave/internal/logging" - "github.com/tis24dev/proxsave/internal/types" -) - -type closedDoneContext struct { - context.Context - done chan struct{} - err error -} - -func newClosedDoneContext(err error) *closedDoneContext { - ch := make(chan struct{}) - close(ch) - return &closedDoneContext{ - Context: context.Background(), - done: ch, - err: err, - } -} - -func (c *closedDoneContext) Done() <-chan struct{} { return c.done } -func (c *closedDoneContext) Err() error { return c.err } - -func TestDownsampleRoots(t *testing.T) { - roots := []string{"a", "b", "c", "d"} - if got := downsampleRoots(roots, 0); !reflect.DeepEqual(got, roots) { - t.Fatalf("limit=0 should return original slice") - } - limited := downsampleRoots(roots, 2) - if len(limited) != 2 { - t.Fatalf("expected limited slice len 2, got %d", len(limited)) - } - seen := map[string]bool{} - for _, r := range limited { - if seen[r] { - t.Fatalf("duplicate in downsampled roots: %s", r) - } - seen[r] = true - } -} - -func TestDeterministicShuffleAndSeed(t *testing.T) { - items := []string{"one", "two", "three"} - seed := deterministicSeed("a", "b") - seed2 := deterministicSeed("a", "b", "c") - if seed == seed2 { - t.Fatalf("different seed inputs should differ") - } - - first := append([]string(nil), items...) - second := append([]string(nil), items...) - shuffleStringsDeterministic(first, seed) - shuffleStringsDeterministic(second, seed) - if !reflect.DeepEqual(first, second) { - t.Fatalf("shuffle with same seed should be deterministic") - } -} - -func TestPxarRootSelector(t *testing.T) { - sel := newPxarRootSelector(2) - for _, p := range []string{"a", "b", "c", "d"} { - sel.consider(p) - } - results := sel.results() - if len(results) != 2 { - t.Fatalf("expected 2 results, got %d", len(results)) - } - if sel.total != 4 || !sel.capped { - t.Fatalf("selector total=%d capped=%v want total=4 capped=true", sel.total, sel.capped) - } -} - -func TestPxarRootSelectorLimitZeroReturnsAllUnique(t *testing.T) { - sel := newPxarRootSelector(0) - for _, p := range []string{"a", "a", "b"} { - sel.consider(p) - } - results := sel.results() - if len(results) != 2 { - t.Fatalf("expected unique results, got %v", results) - } -} - -func TestPxarRootSelectorSkipsReplacementForHighWeightCandidate(t *testing.T) { - p1, p2 := "a", "b" - if hashPath(p1) > hashPath(p2) { - p1, p2 = p2, p1 - } - - sel := newPxarRootSelector(1) - sel.consider(p1) - sel.consider(p2) // higher weight => should be ignored - - results := sel.results() - if len(results) != 1 || results[0] != p1 { - t.Fatalf("expected selector to keep low-weight %q, got %v", p1, results) - } - if !sel.capped { - t.Fatalf("expected selector capped=true") - } -} - -func TestRecomputeMaxHandlesEmptyItems(t *testing.T) { - sel := newPxarRootSelector(1) - sel.items = nil - sel.recomputeMax() - if sel.maxIdx != -1 || sel.maxWeight != 0 { - t.Fatalf("unexpected recompute state: maxIdx=%d maxWeight=%d", sel.maxIdx, sel.maxWeight) - } -} - -func TestHashPathAndUniquePaths(t *testing.T) { - if hashPath("foo") == hashPath("bar") { - t.Fatalf("expected different hashes for different inputs") - } - paths := []string{"a", "a", "b"} - unique := uniquePaths(paths) - if len(unique) != 2 || unique[0] != "a" || unique[1] != "b" { - t.Fatalf("uniquePaths failed: %#v", unique) - } -} - -func TestUniquePathsEmptyInput(t *testing.T) { - if got := uniquePaths(nil); got != nil { - t.Fatalf("expected nil, got %#v", got) - } - if got := uniquePaths([]string{}); len(got) != 0 { - t.Fatalf("expected empty slice, got %#v", got) - } -} - -func TestDownsampleRootsStepOneReturnsPrefix(t *testing.T) { - roots := []string{"a", "b", "c"} - got := downsampleRoots(roots, 2) - if !reflect.DeepEqual(got, []string{"a", "b"}) { - t.Fatalf("expected prefix, got %#v", got) - } -} - -func TestSampleFilesRespectsPatternsAndLimit(t *testing.T) { - root := t.TempDir() - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, GetDefaultCollectorConfig(), root, types.ProxmoxBS, false) - - mk := func(rel, content string) { - path := filepath.Join(root, rel) - _ = os.MkdirAll(filepath.Dir(path), 0o755) - _ = os.WriteFile(path, []byte(content), 0o640) - } - mk("keep1.txt", "data") - mk("skip.log", "data") - mk(filepath.Join("nested", "keep2.txt"), "data") - - ctx := context.Background() - include := []string{"*.txt"} - exclude := []string{"skip*"} - - results, err := c.sampleFiles(ctx, root, include, exclude, 3, 2) - if err != nil { - t.Fatalf("sampleFiles error: %v", err) - } - if len(results) != 2 { - t.Fatalf("expected 2 results (limit), got %d", len(results)) - } - for _, r := range results { - if filepath.Ext(r.RelativePath) != ".txt" { - t.Fatalf("unexpected file in results: %+v", r) - } - if r.SizeHuman == "" { - t.Fatalf("SizeHuman should be set") - } - } -} - -func TestSampleFilesLimitZeroReturnsEmpty(t *testing.T) { - root := t.TempDir() - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, GetDefaultCollectorConfig(), root, types.ProxmoxBS, false) - - results, err := c.sampleFiles(context.Background(), root, nil, nil, 3, 0) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if len(results) != 0 { - t.Fatalf("expected empty result, got %d", len(results)) - } -} - -func TestSampleFilesReadDirErrorPropagates(t *testing.T) { - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, GetDefaultCollectorConfig(), t.TempDir(), types.ProxmoxBS, false) - - _, err := c.sampleFiles(context.Background(), filepath.Join(t.TempDir(), "missing"), nil, nil, 3, 1) - if err == nil { - t.Fatalf("expected error for missing root") - } -} - -func TestSampleFilesLimitTriggersDuringTopLevelScan(t *testing.T) { - root := t.TempDir() - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, GetDefaultCollectorConfig(), root, types.ProxmoxBS, false) - - for i := 0; i < 5; i++ { - name := fmt.Sprintf("file-%d.txt", i) - if err := os.WriteFile(filepath.Join(root, name), []byte("x"), 0o644); err != nil { - t.Fatalf("write %s: %v", name, err) - } - } - - results, err := c.sampleFiles(context.Background(), root, nil, nil, 3, 1) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if len(results) != 1 { - t.Fatalf("expected 1 result due to limit, got %d", len(results)) - } -} - -func TestSampleFilesReturnsWhenNoWorkerRoots(t *testing.T) { - root := t.TempDir() - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, GetDefaultCollectorConfig(), root, types.ProxmoxBS, false) - - if err := os.WriteFile(filepath.Join(root, "top.txt"), []byte("x"), 0o644); err != nil { - t.Fatalf("write top.txt: %v", err) - } - - results, err := c.sampleFiles(context.Background(), root, nil, nil, 3, 10) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if len(results) != 1 { - t.Fatalf("expected 1 result, got %d", len(results)) - } -} - -func TestSampleFilesUsesDefaultWorkerLimitWhenZero(t *testing.T) { - root := t.TempDir() - cfg := GetDefaultCollectorConfig() - cfg.PxarIntraConcurrency = 0 - - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, cfg, t.TempDir(), types.ProxmoxBS, false) - - if err := os.MkdirAll(filepath.Join(root, "nested"), 0o755); err != nil { - t.Fatalf("mkdir nested: %v", err) - } - if err := os.WriteFile(filepath.Join(root, "nested", "n.txt"), []byte("x"), 0o644); err != nil { - t.Fatalf("write n.txt: %v", err) - } - - key := fmt.Sprintf("%s|fanout=%d|max=%d", root, c.config.PxarScanFanoutLevel, c.config.PxarScanMaxRoots) - c.rootsCache[key] = []string{root} - - results, err := c.sampleFiles(context.Background(), root, nil, nil, 3, 10) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if len(results) == 0 { - t.Fatalf("expected results from nested walk") - } -} - -func TestSampleFilesSkipsCollectorExcludeAndNonMatchingIncludeAndBrokenSymlinkInfo(t *testing.T) { - root := t.TempDir() - cfg := GetDefaultCollectorConfig() - cfg.ExcludePatterns = []string{"excluded.txt"} - - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, cfg, t.TempDir(), types.ProxmoxBS, false) - - if err := os.WriteFile(filepath.Join(root, "keep.txt"), []byte("ok"), 0o644); err != nil { - t.Fatalf("write keep.txt: %v", err) - } - if err := os.WriteFile(filepath.Join(root, "excluded.txt"), []byte("no"), 0o644); err != nil { - t.Fatalf("write excluded.txt: %v", err) - } - if err := os.WriteFile(filepath.Join(root, "skip.log"), []byte("log"), 0o644); err != nil { - t.Fatalf("write skip.log: %v", err) - } - if err := os.Symlink("missing-target", filepath.Join(root, "broken")); err != nil { - t.Fatalf("symlink broken: %v", err) - } - - results, err := c.sampleFiles(context.Background(), root, []string{"*.txt"}, nil, 3, 10) - if err != nil { - t.Fatalf("sampleFiles error: %v", err) - } - if len(results) != 1 || results[0].RelativePath != "keep.txt" { - t.Fatalf("expected only keep.txt, got %#v", results) - } -} - -func TestSampleFilesSkipsExcludedDirsAndRespectsMaxDepthInWorkerWalk(t *testing.T) { - root := t.TempDir() - cfg := GetDefaultCollectorConfig() - cfg.ExcludePatterns = []string{"skip"} - - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, cfg, t.TempDir(), types.ProxmoxBS, false) - - if err := os.MkdirAll(filepath.Join(root, "skip", "inner"), 0o755); err != nil { - t.Fatalf("mkdir skip/inner: %v", err) - } - if err := os.WriteFile(filepath.Join(root, "skip", "inner", "skip.txt"), []byte("no"), 0o644); err != nil { - t.Fatalf("write skip file: %v", err) - } - - if err := os.MkdirAll(filepath.Join(root, "deep", "inner"), 0o755); err != nil { - t.Fatalf("mkdir deep/inner: %v", err) - } - if err := os.WriteFile(filepath.Join(root, "deep", "ok.txt"), []byte("ok"), 0o644); err != nil { - t.Fatalf("write deep/ok.txt: %v", err) - } - if err := os.WriteFile(filepath.Join(root, "deep", "inner", "too-deep.txt"), []byte("no"), 0o644); err != nil { - t.Fatalf("write deep/inner/too-deep.txt: %v", err) - } - - key := fmt.Sprintf("%s|fanout=%d|max=%d", root, c.config.PxarScanFanoutLevel, c.config.PxarScanMaxRoots) - c.rootsCache[key] = []string{root} - - results, err := c.sampleFiles(context.Background(), root, nil, nil, 1, 10) - if err != nil { - t.Fatalf("sampleFiles error: %v", err) - } - - paths := make([]string, 0, len(results)) - for _, r := range results { - paths = append(paths, r.RelativePath) - } - if !reflect.DeepEqual(paths, []string{"deep/ok.txt"}) { - t.Fatalf("expected only deep/ok.txt due to exclusions and maxDepth, got %v", paths) - } -} - -func TestSampleDirectoriesDepthAndLimit(t *testing.T) { - root := t.TempDir() - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, GetDefaultCollectorConfig(), root, types.ProxmoxVE, false) - - makeDir := func(rel string) { - _ = os.MkdirAll(filepath.Join(root, rel), 0o755) - } - makeDir("a/b") - makeDir("c") - makeDir("d/e/f") - - key := fmt.Sprintf("%s|fanout=%d|max=%d", root, c.config.PxarScanFanoutLevel, c.config.PxarScanMaxRoots) - c.rootsCache[key] = []string{root} - - ctx := context.Background() - dirs, err := c.sampleDirectories(ctx, root, 1, 2) - if err != nil { - t.Fatalf("sampleDirectories error: %v", err) - } - if len(dirs) != 2 { - t.Fatalf("expected limit 2, got %d", len(dirs)) - } - for _, d := range dirs { - if strings.Count(d, "/") > 0 { - t.Fatalf("expected depth < 1, got %s", d) - } - } -} - -func TestComputePxarWorkerRootsFallbackToIntermediateLevelAndDownsamples(t *testing.T) { - root := t.TempDir() - for _, p := range []string{ - "a/a1", - "a/a2", - "b/b1", - "c/c1", - "d/d1", - } { - if err := os.MkdirAll(filepath.Join(root, p), 0o755); err != nil { - t.Fatalf("mkdir %s: %v", p, err) - } - } - - cfg := GetDefaultCollectorConfig() - cfg.PxarScanFanoutLevel = 3 - cfg.PxarScanMaxRoots = 2 - cfg.PxarEnumWorkers = 1 - - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, cfg, t.TempDir(), types.ProxmoxBS, false) - - ctx := context.Background() - roots, err := c.computePxarWorkerRoots(ctx, root, "fallback-test") - if err != nil { - t.Fatalf("computePxarWorkerRoots error: %v", err) - } - if len(roots) != 2 { - t.Fatalf("expected downsampled roots len 2, got %d (%v)", len(roots), roots) - } - for _, r := range roots { - if _, err := os.Stat(r); err != nil { - t.Fatalf("expected root to exist (%s): %v", r, err) - } - rel, err := filepath.Rel(root, r) - if err != nil { - t.Fatalf("rel error: %v", err) - } - if strings.Count(rel, string(filepath.Separator)) != 1 { - t.Fatalf("expected fallback roots at depth 2, got %s (rel=%s)", r, rel) - } - } -} - -func TestSampleDirectoriesLimitZeroReturnsEmpty(t *testing.T) { - root := t.TempDir() - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, GetDefaultCollectorConfig(), root, types.ProxmoxVE, false) - - results, err := c.sampleDirectories(context.Background(), root, 2, 0) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if len(results) != 0 { - t.Fatalf("expected empty result, got %d", len(results)) - } -} - -func TestSampleDirectoriesReturnsWhenNoWorkerRoots(t *testing.T) { - root := t.TempDir() - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, GetDefaultCollectorConfig(), root, types.ProxmoxVE, false) - - results, err := c.sampleDirectories(context.Background(), root, 2, 10) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if len(results) != 0 { - t.Fatalf("expected empty result, got %d", len(results)) - } -} - -func TestSampleDirectoriesStopsAtLimit(t *testing.T) { - root := t.TempDir() - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, GetDefaultCollectorConfig(), root, types.ProxmoxVE, false) - - for _, d := range []string{"a", "b", "c"} { - if err := os.MkdirAll(filepath.Join(root, d, "x"), 0o755); err != nil { - t.Fatalf("mkdir %s: %v", d, err) - } - } - - key := fmt.Sprintf("%s|fanout=%d|max=%d", root, c.config.PxarScanFanoutLevel, c.config.PxarScanMaxRoots) - c.rootsCache[key] = []string{root} - - dirs, err := c.sampleDirectories(context.Background(), root, 3, 1) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if len(dirs) != 1 { - t.Fatalf("expected 1 result due to limit, got %d", len(dirs)) - } -} - -func TestSampleDirectoriesReturnsErrorWhenWorkerStartDirMissing(t *testing.T) { - root := t.TempDir() - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, GetDefaultCollectorConfig(), t.TempDir(), types.ProxmoxVE, false) - - key := fmt.Sprintf("%s|fanout=%d|max=%d", root, c.config.PxarScanFanoutLevel, c.config.PxarScanMaxRoots) - c.rootsCache[key] = []string{filepath.Join(root, "missing")} - - _, err := c.sampleDirectories(context.Background(), root, 2, 10) - if err == nil { - t.Fatalf("expected error when startDir is missing") - } -} - -func TestSampleDirectoriesUsesDefaultWorkerLimitAndSkipsExcludedDirs(t *testing.T) { - root := t.TempDir() - cfg := GetDefaultCollectorConfig() - cfg.PxarIntraConcurrency = 0 - // shouldExclude() tests patterns against multiple "candidates" including the basename, - // so using "skip" reliably excludes the directory itself and thus its subtree via SkipDir. - cfg.ExcludePatterns = []string{"skip"} - - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, cfg, t.TempDir(), types.ProxmoxVE, false) - - for _, d := range []string{"keep/inner", "skip/inner"} { - if err := os.MkdirAll(filepath.Join(root, d), 0o755); err != nil { - t.Fatalf("mkdir %s: %v", d, err) - } - } - - key := fmt.Sprintf("%s|fanout=%d|max=%d", root, c.config.PxarScanFanoutLevel, c.config.PxarScanMaxRoots) - c.rootsCache[key] = []string{root} - - dirs, err := c.sampleDirectories(context.Background(), root, 3, 10) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - for _, d := range dirs { - if strings.HasPrefix(d, "skip") { - t.Fatalf("expected excluded directories to be skipped, got %v", dirs) - } - } -} - -func TestSampleDirectoriesReturnsNilOnCanceledContextWithoutStartingWorkers(t *testing.T) { - root := t.TempDir() - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, GetDefaultCollectorConfig(), t.TempDir(), types.ProxmoxVE, false) - - if err := os.MkdirAll(filepath.Join(root, "keep"), 0o755); err != nil { - t.Fatalf("mkdir keep: %v", err) - } - key := fmt.Sprintf("%s|fanout=%d|max=%d", root, c.config.PxarScanFanoutLevel, c.config.PxarScanMaxRoots) - c.rootsCache[key] = []string{root} - - ctx, cancel := context.WithCancel(context.Background()) - cancel() - - dirs, err := c.sampleDirectories(ctx, root, 2, 10) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if len(dirs) != 0 { - t.Fatalf("expected empty result, got %v", dirs) - } -} - -func TestSampleDirectoriesReturnsContextErrorWhenNotCanceled(t *testing.T) { - root := t.TempDir() - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, GetDefaultCollectorConfig(), t.TempDir(), types.ProxmoxVE, false) - - if err := os.MkdirAll(filepath.Join(root, "keep"), 0o755); err != nil { - t.Fatalf("mkdir keep: %v", err) - } - key := fmt.Sprintf("%s|fanout=%d|max=%d", root, c.config.PxarScanFanoutLevel, c.config.PxarScanMaxRoots) - c.rootsCache[key] = []string{root} - - errBoom := fmt.Errorf("boom") - _, err := c.sampleDirectories(newClosedDoneContext(errBoom), root, 2, 10) - if err == nil || err.Error() != errBoom.Error() { - t.Fatalf("expected %v, got %v", errBoom, err) - } -} - -func TestSampleDirectoriesSkipsExcludedFiles(t *testing.T) { - root := t.TempDir() - cfg := GetDefaultCollectorConfig() - cfg.ExcludePatterns = []string{"skip.txt"} - - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, cfg, t.TempDir(), types.ProxmoxVE, false) - - if err := os.MkdirAll(filepath.Join(root, "keep"), 0o755); err != nil { - t.Fatalf("mkdir keep: %v", err) - } - if err := os.WriteFile(filepath.Join(root, "skip.txt"), []byte("nope"), 0o644); err != nil { - t.Fatalf("write skip.txt: %v", err) - } - key := fmt.Sprintf("%s|fanout=%d|max=%d", root, c.config.PxarScanFanoutLevel, c.config.PxarScanMaxRoots) - c.rootsCache[key] = []string{root} - - dirs, err := c.sampleDirectories(context.Background(), root, 2, 10) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - foundKeep := false - for _, d := range dirs { - if d == "keep" { - foundKeep = true - } - } - if !foundKeep { - t.Fatalf("expected keep in results, got %v", dirs) - } -} - -func TestComputePxarWorkerRootsNormalizesDefaults(t *testing.T) { - root := t.TempDir() - if err := os.MkdirAll(filepath.Join(root, "a"), 0o755); err != nil { - t.Fatalf("mkdir a: %v", err) - } - - cfg := GetDefaultCollectorConfig() - cfg.PxarScanFanoutLevel = 0 - cfg.PxarScanMaxRoots = 0 - cfg.PxarEnumWorkers = 0 - cfg.PxarEnumBudgetMs = 1 - - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, cfg, t.TempDir(), types.ProxmoxBS, false) - - roots, err := c.computePxarWorkerRoots(context.Background(), root, "defaults") - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if len(roots) == 0 { - t.Fatalf("expected some roots, got %v", roots) - } -} - -func TestComputePxarWorkerRootsReturnsNilWhenNoDirsFound(t *testing.T) { - root := t.TempDir() - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, GetDefaultCollectorConfig(), t.TempDir(), types.ProxmoxBS, false) - - roots, err := c.computePxarWorkerRoots(context.Background(), root, "empty") - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if roots != nil { - t.Fatalf("expected nil roots, got %v", roots) - } -} - -func TestComputePxarWorkerRootsCapsAndSkipsExcludedChildren(t *testing.T) { - root := t.TempDir() - for _, d := range []string{"keep1", "keep2", "skip"} { - if err := os.MkdirAll(filepath.Join(root, d), 0o755); err != nil { - t.Fatalf("mkdir %s: %v", d, err) - } - } - - cfg := GetDefaultCollectorConfig() - cfg.ExcludePatterns = []string{"skip"} - cfg.PxarScanFanoutLevel = 1 - cfg.PxarScanMaxRoots = 1 - cfg.PxarEnumWorkers = 1 - cfg.PxarStopOnCap = false - - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, cfg, t.TempDir(), types.ProxmoxBS, false) - - roots, err := c.computePxarWorkerRoots(context.Background(), root, "cap-test") - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if len(roots) != 1 { - t.Fatalf("expected 1 root due to cap, got %v", roots) - } - for _, r := range roots { - if strings.Contains(r, string(filepath.Separator)+"skip") || filepath.Base(r) == "skip" { - t.Fatalf("expected excluded dir not to be returned, got %v", roots) - } - } -} - -func TestComputePxarWorkerRootsBudgetExceededReturnsNil(t *testing.T) { - cfg := GetDefaultCollectorConfig() - cfg.PxarScanFanoutLevel = 2 - cfg.PxarEnumBudgetMs = 1 - - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, cfg, t.TempDir(), types.ProxmoxBS, false) - - roots, err := c.computePxarWorkerRoots(newClosedDoneContext(context.DeadlineExceeded), t.TempDir(), "budget-test") - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if roots != nil { - t.Fatalf("expected nil roots, got %v", roots) - } -} - -func TestComputePxarWorkerRootsDebugProgressStopsOnChannelClose(t *testing.T) { - root := t.TempDir() - if err := os.MkdirAll(filepath.Join(root, "a"), 0o755); err != nil { - t.Fatalf("mkdir a: %v", err) - } - - cfg := GetDefaultCollectorConfig() - cfg.PxarScanFanoutLevel = 1 - cfg.PxarScanMaxRoots = 1 - cfg.PxarEnumWorkers = 1 - - logger := logging.New(types.LogLevelDebug, false) - c := NewCollector(logger, cfg, t.TempDir(), types.ProxmoxBS, false) - - roots, err := c.computePxarWorkerRoots(context.Background(), root, "debug-progress") - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if len(roots) == 0 { - t.Fatalf("expected some roots, got %v", roots) - } -} - -func TestComputePxarWorkerRootsDebugProgressStopsOnCtxDone(t *testing.T) { - cfg := GetDefaultCollectorConfig() - cfg.PxarScanFanoutLevel = 1 - - logger := logging.New(types.LogLevelDebug, false) - c := NewCollector(logger, cfg, t.TempDir(), types.ProxmoxBS, false) - - roots, err := c.computePxarWorkerRoots(newClosedDoneContext(context.DeadlineExceeded), t.TempDir(), "debug-ctxdone") - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if roots != nil { - t.Fatalf("expected nil roots, got %v", roots) - } -} - -func TestSampleFilesReturnsErrorWhenWorkerStartDirMissing(t *testing.T) { - root := t.TempDir() - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, GetDefaultCollectorConfig(), t.TempDir(), types.ProxmoxBS, false) - - key := fmt.Sprintf("%s|fanout=%d|max=%d", root, c.config.PxarScanFanoutLevel, c.config.PxarScanMaxRoots) - c.rootsCache[key] = []string{filepath.Join(root, "missing")} - - _, err := c.sampleFiles(context.Background(), root, nil, nil, 3, 10) - if err == nil { - t.Fatalf("expected error when startDir is missing") - } -} diff --git a/internal/backup/fs_sampling_bounded.go b/internal/backup/fs_sampling_bounded.go new file mode 100644 index 0000000..7755115 --- /dev/null +++ b/internal/backup/fs_sampling_bounded.go @@ -0,0 +1,148 @@ +package backup + +import ( + "context" + "errors" + "path/filepath" + "strings" + "time" + + "github.com/tis24dev/proxsave/internal/safefs" +) + +func (c *Collector) sampleDirectoriesBounded(ctx context.Context, root string, maxDepth, limit int, ioTimeout time.Duration) ([]string, error) { + results := make([]string, 0, limit) + if limit <= 0 || maxDepth <= 0 { + return results, nil + } + + root = filepath.Clean(root) + stack := []string{root} + + for len(stack) > 0 && len(results) < limit { + if err := ctx.Err(); err != nil { + return results, err + } + dirPath := stack[len(stack)-1] + stack = stack[:len(stack)-1] + + entries, err := safefs.ReadDir(ctx, dirPath, ioTimeout) + if err != nil { + return results, err + } + + for _, entry := range entries { + if err := ctx.Err(); err != nil { + return results, err + } + if !entry.IsDir() { + continue + } + child := filepath.Join(dirPath, entry.Name()) + if c.shouldExclude(child) { + continue + } + + rel, relErr := filepath.Rel(root, child) + if relErr != nil || rel == "." || strings.HasPrefix(rel, "..") { + continue + } + rel = filepath.ToSlash(rel) + depth := strings.Count(rel, "/") + if depth >= maxDepth { + continue + } + + results = append(results, rel) + if len(results) >= limit { + break + } + if depth < maxDepth-1 { + stack = append(stack, child) + } + } + } + + return results, nil +} + +func (c *Collector) sampleFilesBounded(ctx context.Context, root string, includePatterns, excludePatterns []string, maxDepth, limit int, ioTimeout time.Duration) ([]FileSummary, error) { + results := make([]FileSummary, 0, limit) + if limit <= 0 { + return results, nil + } + + root = filepath.Clean(root) + stack := []string{root} + + for len(stack) > 0 && len(results) < limit { + if err := ctx.Err(); err != nil { + return results, err + } + dirPath := stack[len(stack)-1] + stack = stack[:len(stack)-1] + + entries, err := safefs.ReadDir(ctx, dirPath, ioTimeout) + if err != nil { + return results, err + } + + for _, entry := range entries { + if err := ctx.Err(); err != nil { + return results, err + } + + name := entry.Name() + full := filepath.Join(dirPath, name) + if c.shouldExclude(full) { + continue + } + + if entry.IsDir() { + rel, relErr := filepath.Rel(root, full) + if relErr != nil || rel == "." || strings.HasPrefix(rel, "..") { + continue + } + rel = filepath.ToSlash(rel) + depth := strings.Count(rel, "/") + if depth >= maxDepth { + continue + } + stack = append(stack, full) + continue + } + + rel, relErr := filepath.Rel(root, full) + if relErr != nil || rel == "." || strings.HasPrefix(rel, "..") { + continue + } + + if len(excludePatterns) > 0 && matchAnyPattern(excludePatterns, name, rel) { + continue + } + if len(includePatterns) > 0 && !matchAnyPattern(includePatterns, name, rel) { + continue + } + + info, err := safefs.Stat(ctx, full, ioTimeout) + if err != nil { + if errors.Is(err, safefs.ErrTimeout) { + return results, err + } + continue + } + + results = append(results, FileSummary{ + RelativePath: filepath.ToSlash(rel), + SizeBytes: info.Size(), + SizeHuman: FormatBytes(info.Size()), + ModTime: info.ModTime(), + }) + if len(results) >= limit { + break + } + } + } + + return results, nil +} diff --git a/internal/backup/fs_sampling_bounded_test.go b/internal/backup/fs_sampling_bounded_test.go new file mode 100644 index 0000000..096f0a4 --- /dev/null +++ b/internal/backup/fs_sampling_bounded_test.go @@ -0,0 +1,137 @@ +package backup + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/tis24dev/proxsave/internal/types" +) + +func TestSampleDirectoriesBoundedRespectsDepthAndLimit(t *testing.T) { + root := t.TempDir() + for _, rel := range []string{ + filepath.Join("a", "b"), + "c", + filepath.Join("d", "e", "f"), + } { + if err := os.MkdirAll(filepath.Join(root, rel), 0o755); err != nil { + t.Fatalf("mkdir %s: %v", rel, err) + } + } + + c := NewCollector(newTestLogger(), GetDefaultCollectorConfig(), t.TempDir(), types.ProxmoxVE, false) + + dirs, err := c.sampleDirectoriesBounded(context.Background(), root, 1, 10, 0) + if err != nil { + t.Fatalf("sampleDirectoriesBounded error: %v", err) + } + if len(dirs) != 3 { + t.Fatalf("expected 3 top-level dirs, got %v", dirs) + } + for _, d := range dirs { + if strings.Contains(d, "/") { + t.Fatalf("expected top-level dir, got %q", d) + } + } + + dirs, err = c.sampleDirectoriesBounded(context.Background(), root, 2, 20, 0) + if err != nil { + t.Fatalf("sampleDirectoriesBounded error: %v", err) + } + want := map[string]bool{ + "a": true, + "a/b": true, + "c": true, + "d": true, + "d/e": true, + } + for _, got := range dirs { + delete(want, got) + if got == "d/e/f" { + t.Fatalf("unexpected deep dir %q in results: %v", got, dirs) + } + } + if len(want) != 0 { + t.Fatalf("missing expected directories: %#v (got %v)", want, dirs) + } + + limited, err := c.sampleDirectoriesBounded(context.Background(), root, 1, 2, 0) + if err != nil { + t.Fatalf("sampleDirectoriesBounded error: %v", err) + } + if len(limited) != 2 { + t.Fatalf("expected limit=2 results, got %v", limited) + } +} + +func TestSampleFilesBoundedRespectsPatternsExcludeAndDepth(t *testing.T) { + root := t.TempDir() + write := func(rel, content string) { + path := filepath.Join(root, rel) + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + t.Fatalf("mkdir %s: %v", filepath.Dir(rel), err) + } + if err := os.WriteFile(path, []byte(content), 0o640); err != nil { + t.Fatalf("write %s: %v", rel, err) + } + } + + write("keep1.txt", "data") + write("excluded.txt", "data") + write("skip_me.txt", "data") + write(filepath.Join("nested", "keep2.txt"), "data") + write(filepath.Join("nested", "deep", "keep3.txt"), "data") + + cfg := GetDefaultCollectorConfig() + cfg.ExcludePatterns = []string{"excluded.txt"} + c := NewCollector(newTestLogger(), cfg, t.TempDir(), types.ProxmoxBS, false) + + include := []string{"*.txt"} + exclude := []string{"skip*"} + results, err := c.sampleFilesBounded(context.Background(), root, include, exclude, 1, 50, 0) + if err != nil { + t.Fatalf("sampleFilesBounded error: %v", err) + } + + got := map[string]FileSummary{} + for _, r := range results { + got[r.RelativePath] = r + if strings.Contains(r.RelativePath, `\`) { + t.Fatalf("expected forward-slash relative path, got %q", r.RelativePath) + } + if r.SizeHuman == "" || r.SizeBytes <= 0 { + t.Fatalf("expected populated size fields, got %+v", r) + } + } + + if _, ok := got["keep1.txt"]; !ok { + t.Fatalf("expected keep1.txt in results: %v", results) + } + if _, ok := got["nested/keep2.txt"]; !ok { + t.Fatalf("expected nested/keep2.txt in results: %v", results) + } + if _, ok := got["excluded.txt"]; ok { + t.Fatalf("expected excluded.txt to be skipped: %v", results) + } + if _, ok := got["skip_me.txt"]; ok { + t.Fatalf("expected skip_me.txt to be excluded by pattern: %v", results) + } + if _, ok := got["nested/deep/keep3.txt"]; ok { + t.Fatalf("expected nested/deep/keep3.txt to be skipped due to maxDepth: %v", results) + } +} + +func TestSampleFilesBoundedLimitZeroReturnsEmpty(t *testing.T) { + root := t.TempDir() + c := NewCollector(newTestLogger(), GetDefaultCollectorConfig(), t.TempDir(), types.ProxmoxBS, false) + results, err := c.sampleFilesBounded(context.Background(), root, nil, nil, 2, 0, 0) + if err != nil { + t.Fatalf("sampleFilesBounded error: %v", err) + } + if len(results) != 0 { + t.Fatalf("expected empty results, got %v", results) + } +} diff --git a/internal/backup/optimizations_structured_test.go b/internal/backup/optimizations_structured_test.go index 1a46a16..5d20129 100644 --- a/internal/backup/optimizations_structured_test.go +++ b/internal/backup/optimizations_structured_test.go @@ -13,26 +13,26 @@ import ( func TestPrefilterSkipsStructuredConfigs(t *testing.T) { tmp := t.TempDir() - + // Create structured config (should be skipped) pbsDir := filepath.Join(tmp, "etc", "proxmox-backup") if err := os.MkdirAll(pbsDir, 0o755); err != nil { t.Fatalf("mkdir: %v", err) } - + pbsCfg := filepath.Join(pbsDir, "datastore.cfg") pbsContent := "datastore: Test\n\tpath /mnt/test\n\tcomment Test DS\n" if err := os.WriteFile(pbsCfg, []byte(pbsContent), 0o640); err != nil { t.Fatalf("write pbs config: %v", err) } - + // Create normal config with CRLF (should be normalized) normalCfg := filepath.Join(tmp, "etc", "normal.cfg") normalContent := "option1\r\noption2\r\n" if err := os.WriteFile(normalCfg, []byte(normalContent), 0o640); err != nil { t.Fatalf("write normal config: %v", err) } - + // Create log file with CRLF (should be normalized) logDir := filepath.Join(tmp, "var", "log") if err := os.MkdirAll(logDir, 0o755); err != nil { @@ -43,13 +43,13 @@ func TestPrefilterSkipsStructuredConfigs(t *testing.T) { if err := os.WriteFile(logFile, []byte(logContent), 0o640); err != nil { t.Fatalf("write log: %v", err) } - + // Run prefilter logger := logging.New(types.LogLevelError, false) if err := prefilterFiles(context.Background(), logger, tmp, 8*1024*1024); err != nil { t.Fatalf("prefilterFiles: %v", err) } - + // Verify PBS config unchanged (TABs preserved) pbsAfter, _ := os.ReadFile(pbsCfg) if string(pbsAfter) != pbsContent { @@ -58,7 +58,7 @@ func TestPrefilterSkipsStructuredConfigs(t *testing.T) { if !strings.Contains(string(pbsAfter), "\t") { t.Fatalf("PBS config lost TAB indentation") } - + // Verify normal config normalized (CRLF removed) normalAfter, _ := os.ReadFile(normalCfg) if strings.Contains(string(normalAfter), "\r") { @@ -68,7 +68,7 @@ func TestPrefilterSkipsStructuredConfigs(t *testing.T) { if string(normalAfter) != expectedNormal { t.Fatalf("Normal config not normalized correctly\nExpected: %q\nGot: %q", expectedNormal, string(normalAfter)) } - + // Verify log normalized (CRLF removed) logAfter, _ := os.ReadFile(logFile) if strings.Contains(string(logAfter), "\r") { diff --git a/internal/config/config.go b/internal/config/config.go index 0fa10a5..1bea4f2 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -219,12 +219,6 @@ type Config struct { BackupPruneSchedules bool BackupPxarFiles bool PxarDatastoreConcurrency int - PxarIntraConcurrency int - PxarScanFanoutLevel int - PxarScanMaxRoots int - PxarStopOnCap bool - PxarEnumWorkers int - PxarEnumBudgetMs int PxarFileIncludePatterns []string PxarFileExcludePatterns []string @@ -704,12 +698,6 @@ func (c *Config) parsePBSSettings() { c.BackupPruneSchedules = c.getBool("BACKUP_PRUNE_SCHEDULES", true) c.BackupPxarFiles = c.getBoolWithFallback([]string{"PXAR_SCAN_ENABLE", "BACKUP_PXAR_FILES"}, true) c.PxarDatastoreConcurrency = c.getInt("PXAR_SCAN_DS_CONCURRENCY", 3) - c.PxarIntraConcurrency = c.getInt("PXAR_SCAN_INTRA_CONCURRENCY", 4) - c.PxarScanFanoutLevel = c.getInt("PXAR_SCAN_FANOUT_LEVEL", 2) - c.PxarScanMaxRoots = c.getInt("PXAR_SCAN_MAX_ROOTS", 2048) - c.PxarStopOnCap = c.getBool("PXAR_STOP_ON_CAP", false) - c.PxarEnumWorkers = c.getInt("PXAR_ENUM_READDIR_WORKERS", 4) - c.PxarEnumBudgetMs = c.getInt("PXAR_ENUM_BUDGET_MS", 0) c.PxarFileIncludePatterns = normalizeList(c.getStringSliceWithFallback([]string{"PXAR_FILE_INCLUDE_PATTERN", "PXAR_INCLUDE_PATTERN"}, nil)) c.PxarFileExcludePatterns = normalizeList(c.getStringSlice("PXAR_FILE_EXCLUDE_PATTERN", nil)) } diff --git a/internal/config/templates/backup.env b/internal/config/templates/backup.env index 4ec1c09..ee03cfb 100644 --- a/internal/config/templates/backup.env +++ b/internal/config/templates/backup.env @@ -308,14 +308,8 @@ BACKUP_TAPE_CONFIGS=true BACKUP_PBS_NETWORK_CONFIG=true # network.cfg (PBS), independent from BACKUP_NETWORK_CONFIGS (system) BACKUP_PRUNE_SCHEDULES=true PXAR_SCAN_ENABLE=false -PXAR_SCAN_DS_CONCURRENCY=3 # Number of datastores scanned in parallel for PXAR metadata -PXAR_SCAN_INTRA_CONCURRENCY=4 # Worker threads per datastore for PXAR directory/file sampling -PXAR_SCAN_FANOUT_LEVEL=2 # Directory depth for worker fan-out (1=top-level, 2=vm/ct IDs, increase for namespaces) -PXAR_SCAN_MAX_ROOTS=2048 # Maximum worker roots per datastore (limits fan-out enumeration) -PXAR_STOP_ON_CAP=false # Stop enumeration immediately after hitting PXAR_SCAN_MAX_ROOTS -PXAR_ENUM_READDIR_WORKERS=4 # Parallel ReadDir workers per fanout depth -PXAR_ENUM_BUDGET_MS=0 # Optional time budget for enumeration (0=disabled) -PXAR_FILE_INCLUDE_PATTERN= # Space/comma separated patterns to locate PXAR files (default auto *.pxar,*.pxar.*) +PXAR_SCAN_DS_CONCURRENCY=3 # Datastores scanned in parallel for PXAR metadata +PXAR_FILE_INCLUDE_PATTERN= # Space/comma separated patterns to locate PXAR files (default: *.pxar,*.pxar.*,catalog.pxar*) PXAR_FILE_EXCLUDE_PATTERN= # Patterns to exclude while sampling files (e.g. *.tmp, *.lock) # Override collection paths (use only if directories differ from defaults) diff --git a/internal/identity/identity_test.go b/internal/identity/identity_test.go index f904228..d7a6354 100644 --- a/internal/identity/identity_test.go +++ b/internal/identity/identity_test.go @@ -844,10 +844,10 @@ func TestAddrAssignRank(t *testing.T) { value int want int }{ - {0, 0}, // permanent - best - {3, 1}, // set by userspace - {2, 2}, // stolen - {1, 3}, // random + {0, 0}, // permanent - best + {3, 1}, // set by userspace + {2, 2}, // stolen + {1, 3}, // random {-1, 4}, // unknown {99, 4}, // unknown } @@ -1324,10 +1324,10 @@ func TestSelectPreferredMACEmpty(t *testing.T) { func TestSelectPreferredMACWithEmptyFields(t *testing.T) { candidates := []macCandidate{ - {Iface: "", MAC: "aa:bb:cc:dd:ee:ff"}, // empty iface - {Iface: "eth0", MAC: ""}, // empty mac - {Iface: " ", MAC: " "}, // whitespace only - {Iface: "eth1", MAC: "00:11:22:33:44:55"}, // valid + {Iface: "", MAC: "aa:bb:cc:dd:ee:ff"}, // empty iface + {Iface: "eth0", MAC: ""}, // empty mac + {Iface: " ", MAC: " "}, // whitespace only + {Iface: "eth1", MAC: "00:11:22:33:44:55"}, // valid } mac, iface := selectPreferredMAC(candidates) diff --git a/internal/orchestrator/artifact_guard_test.go b/internal/orchestrator/artifact_guard_test.go deleted file mode 100644 index cd1b36c..0000000 --- a/internal/orchestrator/artifact_guard_test.go +++ /dev/null @@ -1,33 +0,0 @@ -package orchestrator - -import ( - "fmt" - "os" - "path/filepath" - "testing" -) - -// TestMain guards against tests accidentally creating artifacts in the package -// directory (e.g. due to naive fake binaries interpreting flags as paths). -func TestMain(m *testing.M) { - wd, err := os.Getwd() - if err != nil { - fmt.Fprintln(os.Stderr, "TestMain: failed to get wd:", err) - os.Exit(1) - } - - artifact := filepath.Join(wd, "--progress") - - // Clean up a stale artifact from a previous run (best-effort). - _ = os.Remove(artifact) - - code := m.Run() - - if _, err := os.Stat(artifact); err == nil { - fmt.Fprintln(os.Stderr, "ERROR: test left artifact:", artifact) - _ = os.Remove(artifact) - code = 1 - } - - os.Exit(code) -} diff --git a/internal/orchestrator/backup_safety_test.go b/internal/orchestrator/backup_safety_test.go index 80c8cf5..2529e2e 100644 --- a/internal/orchestrator/backup_safety_test.go +++ b/internal/orchestrator/backup_safety_test.go @@ -1938,10 +1938,10 @@ func TestRestoreSafetyBackup_ComplexStructure(t *testing.T) { // Add files in various directories files := map[string]string{ - "etc/config.conf": "config content", - "etc/app/app.conf": "app config", - "var/lib/app/data": "app data", - "root.txt": "root file", + "etc/config.conf": "config content", + "etc/app/app.conf": "app config", + "var/lib/app/data": "app data", + "root.txt": "root file", } for name, content := range files { hdr := &tar.Header{Name: name, Mode: 0644, Size: int64(len(content))} diff --git a/internal/orchestrator/chunking_paths.go b/internal/orchestrator/chunking_paths.go deleted file mode 100644 index ef54811..0000000 --- a/internal/orchestrator/chunking_paths.go +++ /dev/null @@ -1,28 +0,0 @@ -package orchestrator - -import "strings" - -func originalPathFromChunk(relPath string) (string, bool) { - if !strings.HasSuffix(relPath, ".chunk") { - return "", false - } - withoutSuffix := strings.TrimSuffix(relPath, ".chunk") - dot := strings.LastIndexByte(withoutSuffix, '.') - if dot < 0 { - return "", false - } - idx := withoutSuffix[dot+1:] - if idx == "" { - return "", false - } - for i := 0; i < len(idx); i++ { - if idx[i] < '0' || idx[i] > '9' { - return "", false - } - } - original := withoutSuffix[:dot] - if original == "" { - return "", false - } - return original, true -} diff --git a/internal/orchestrator/decrypt_tui_simulation_test.go b/internal/orchestrator/decrypt_tui_simulation_test.go index d36f36f..9a65f1c 100644 --- a/internal/orchestrator/decrypt_tui_simulation_test.go +++ b/internal/orchestrator/decrypt_tui_simulation_test.go @@ -34,4 +34,3 @@ func TestPromptDecryptIdentity_PassphraseReturnsIdentity(t *testing.T) { t.Fatalf("expected at least one identity") } } - diff --git a/internal/orchestrator/orchestrator.go b/internal/orchestrator/orchestrator.go index 1fa56ae..c629435 100644 --- a/internal/orchestrator/orchestrator.go +++ b/internal/orchestrator/orchestrator.go @@ -1541,22 +1541,6 @@ func applyCollectorOverrides(cc *backup.CollectorConfig, cfg *config.Config) { if cfg.PxarDatastoreConcurrency > 0 { cc.PxarDatastoreConcurrency = cfg.PxarDatastoreConcurrency } - if cfg.PxarIntraConcurrency > 0 { - cc.PxarIntraConcurrency = cfg.PxarIntraConcurrency - } - if cfg.PxarScanFanoutLevel > 0 { - cc.PxarScanFanoutLevel = cfg.PxarScanFanoutLevel - } - if cfg.PxarScanMaxRoots > 0 { - cc.PxarScanMaxRoots = cfg.PxarScanMaxRoots - } - cc.PxarStopOnCap = cfg.PxarStopOnCap - if cfg.PxarEnumWorkers > 0 { - cc.PxarEnumWorkers = cfg.PxarEnumWorkers - } - if cfg.PxarEnumBudgetMs >= 0 { - cc.PxarEnumBudgetMs = cfg.PxarEnumBudgetMs - } cc.PxarFileIncludePatterns = append([]string(nil), cfg.PxarFileIncludePatterns...) cc.PxarFileExcludePatterns = append([]string(nil), cfg.PxarFileExcludePatterns...) diff --git a/internal/orchestrator/orchestrator_test.go b/internal/orchestrator/orchestrator_test.go index 2b5be72..b4400e1 100644 --- a/internal/orchestrator/orchestrator_test.go +++ b/internal/orchestrator/orchestrator_test.go @@ -609,12 +609,6 @@ func TestApplyCollectorOverridesCopiesConfig(t *testing.T) { BaseDir: "/opt/proxsave", PxarDatastoreConcurrency: 3, - PxarIntraConcurrency: 4, - PxarScanFanoutLevel: 2, - PxarScanMaxRoots: 512, - PxarStopOnCap: true, - PxarEnumWorkers: 5, - PxarEnumBudgetMs: 100, PxarFileIncludePatterns: []string{"*.conf"}, PxarFileExcludePatterns: []string{"*.tmp"}, @@ -653,15 +647,8 @@ func TestApplyCollectorOverridesCopiesConfig(t *testing.T) { if cc.ScriptRepositoryPath != cfg.BaseDir { t.Fatalf("ScriptRepositoryPath = %s, want %s", cc.ScriptRepositoryPath, cfg.BaseDir) } - if cc.PxarDatastoreConcurrency != cfg.PxarDatastoreConcurrency || - cc.PxarIntraConcurrency != cfg.PxarIntraConcurrency || - cc.PxarScanFanoutLevel != cfg.PxarScanFanoutLevel || - cc.PxarScanMaxRoots != cfg.PxarScanMaxRoots || - cc.PxarEnumWorkers != cfg.PxarEnumWorkers { - t.Fatalf("Pxar concurrency fields not copied correctly") - } - if !cc.PxarStopOnCap || cc.PxarEnumBudgetMs != cfg.PxarEnumBudgetMs { - t.Fatalf("PxarStopOnCap or PxarEnumBudgetMs not copied") + if cc.PxarDatastoreConcurrency != cfg.PxarDatastoreConcurrency { + t.Fatalf("PxarDatastoreConcurrency not copied correctly") } if len(cc.PxarFileIncludePatterns) != 1 || cc.PxarFileIncludePatterns[0] != "*.conf" { t.Fatalf("PxarFileIncludePatterns not copied as expected: %#v", cc.PxarFileIncludePatterns) diff --git a/internal/orchestrator/pbs_notifications_api_apply_test.go b/internal/orchestrator/pbs_notifications_api_apply_test.go index 51a0ca7..43dac96 100644 --- a/internal/orchestrator/pbs_notifications_api_apply_test.go +++ b/internal/orchestrator/pbs_notifications_api_apply_test.go @@ -60,4 +60,3 @@ func TestApplyPBSNotificationsViaAPI_CreatesEndpointAndMatcher(t *testing.T) { t.Fatalf("calls=%v want %v", runner.calls, want) } } - diff --git a/internal/orchestrator/pbs_service_restart_order_test.go b/internal/orchestrator/pbs_service_restart_order_test.go deleted file mode 100644 index f6999bf..0000000 --- a/internal/orchestrator/pbs_service_restart_order_test.go +++ /dev/null @@ -1,75 +0,0 @@ -package orchestrator - -import ( - "context" - "errors" - "os" - "slices" - "testing" -) - -func TestPBSServicesNotRestartedDuringFileBasedStagedApply(t *testing.T) { - if os.Geteuid() != 0 { - t.Skip("requires root to exercise staged apply code paths") - } - - origFS := restoreFS - origCmd := restoreCmd - t.Cleanup(func() { - restoreFS = origFS - restoreCmd = origCmd - }) - - restoreFS = osFS{} - fakeCmd := &FakeCommandRunner{} - restoreCmd = fakeCmd - - stageRoot := t.TempDir() - logger := newTestLogger() - plan := &RestorePlan{ - SystemType: SystemTypePBS, - StagedCategories: []Category{ - {ID: "pbs_host", Type: CategoryTypePBS}, - {ID: "pbs_notifications", Type: CategoryTypePBS}, - }, - PBSRestoreBehavior: PBSRestoreBehaviorMerge, - } - - if err := maybeApplyPBSConfigsFromStage(context.Background(), logger, plan, stageRoot, false); err != nil { - t.Fatalf("maybeApplyPBSConfigsFromStage error: %v", err) - } - if len(fakeCmd.Calls) != 0 { - t.Fatalf("expected no commands during file-based staged apply, got %v", fakeCmd.Calls) - } - - if err := maybeApplyNotificationsFromStage(context.Background(), logger, plan, stageRoot, false); err != nil { - t.Fatalf("maybeApplyNotificationsFromStage error: %v", err) - } - if len(fakeCmd.Calls) != 0 { - t.Fatalf("expected no commands during notifications staged apply on PBS, got %v", fakeCmd.Calls) - } - - // Allow the temporary stop at the end of API apply to complete quickly. - if fakeCmd.Outputs == nil { - fakeCmd.Outputs = make(map[string][]byte) - } - if fakeCmd.Errors == nil { - fakeCmd.Errors = make(map[string]error) - } - for _, svc := range []string{"proxmox-backup-proxy", "proxmox-backup"} { - key := "systemctl is-active " + svc - fakeCmd.Outputs[key] = []byte("inactive\n") - fakeCmd.Errors[key] = errors.New("exit status 3") - } - - if err := maybeApplyPBSConfigsViaAPIFromStage(context.Background(), logger, plan, stageRoot, false, true); err != nil { - t.Fatalf("maybeApplyPBSConfigsViaAPIFromStage error: %v", err) - } - - if !slices.Contains(fakeCmd.Calls, "systemctl start proxmox-backup") { - t.Fatalf("expected PBS service start during API phase, calls=%v", fakeCmd.Calls) - } - if !slices.Contains(fakeCmd.Calls, "systemctl stop --no-block proxmox-backup-proxy") { - t.Fatalf("expected PBS service stop after API phase, calls=%v", fakeCmd.Calls) - } -} diff --git a/internal/orchestrator/pve_safe_apply_mappings.go b/internal/orchestrator/pve_safe_apply_mappings.go index e907a6f..dea21ea 100644 --- a/internal/orchestrator/pve_safe_apply_mappings.go +++ b/internal/orchestrator/pve_safe_apply_mappings.go @@ -315,4 +315,3 @@ func renderMappingEntry(entry map[string]string) string { } return strings.Join(parts, ",") } - diff --git a/internal/orchestrator/pve_staged_apply_test.go b/internal/orchestrator/pve_staged_apply_test.go index 6015116..a5561be 100644 --- a/internal/orchestrator/pve_staged_apply_test.go +++ b/internal/orchestrator/pve_staged_apply_test.go @@ -19,7 +19,7 @@ func TestPVEStorageMountGuardItems_BuildsExpectedTargets(t *testing.T) { fstabMounts := map[string]struct{}{ "/mnt/datastore": {}, "/mnt/Synology_NFS": {}, - "/": {}, + "/": {}, } items := pveStorageMountGuardItems(candidates, mountCandidates, fstabMounts) diff --git a/internal/orchestrator/restore_access_control_ui.go b/internal/orchestrator/restore_access_control_ui.go index 2fecd71..7dee769 100644 --- a/internal/orchestrator/restore_access_control_ui.go +++ b/internal/orchestrator/restore_access_control_ui.go @@ -209,11 +209,11 @@ func maybeApplyPVEAccessControlFromClusterBackupWithUI( logger.Info("") message := fmt.Sprintf( - "Cluster backup detected.\n\n"+ - "Applying PVE access control will modify users/roles/groups/ACLs and secrets cluster-wide.\n\n"+ - "WARNING: This may lock you out or break API tokens/automation.\n\n"+ - "Safety rail: root@pam is preserved from the current system and kept Administrator on /.\n\n"+ - "Recommendation: do this from local console/IPMI, not over SSH.\n\n"+ + "Cluster backup detected.\n\n" + + "Applying PVE access control will modify users/roles/groups/ACLs and secrets cluster-wide.\n\n" + + "WARNING: This may lock you out or break API tokens/automation.\n\n" + + "Safety rail: root@pam is preserved from the current system and kept Administrator on /.\n\n" + + "Recommendation: do this from local console/IPMI, not over SSH.\n\n" + "Apply 1:1 PVE access control now?", ) applyNow, err := ui.ConfirmAction(ctx, "Apply PVE access control (cluster-wide)", message, "Apply 1:1 (expert)", "Skip apply", 90*time.Second, false) @@ -488,4 +488,3 @@ func buildAccessControlRollbackScript(markerPath, backupPath, logPath string) st ) return strings.Join(lines, "\n") + "\n" } - diff --git a/internal/orchestrator/restore_chunking_selective_test.go b/internal/orchestrator/restore_chunking_selective_test.go deleted file mode 100644 index 1521190..0000000 --- a/internal/orchestrator/restore_chunking_selective_test.go +++ /dev/null @@ -1,112 +0,0 @@ -package orchestrator - -import ( - "archive/tar" - "context" - "crypto/sha256" - "encoding/hex" - "encoding/json" - "os" - "path/filepath" - "testing" - "time" - - "github.com/tis24dev/proxsave/internal/logging" - "github.com/tis24dev/proxsave/internal/types" -) - -func TestExtractArchiveNative_SelectiveRestoreReassemblesChunkedFiles(t *testing.T) { - logger := logging.New(types.LogLevelError, false) - - origFS := restoreFS - restoreFS = osFS{} - t.Cleanup(func() { restoreFS = origFS }) - - tmp := t.TempDir() - archivePath := filepath.Join(tmp, "test.tar") - - data := []byte("hello world") - sum := sha256.Sum256(data) - meta := map[string]any{ - "version": 1, - "size_bytes": len(data), - "chunk_size_bytes": 6, - "chunk_count": 2, - "sha256": hex.EncodeToString(sum[:]), - "mode": 0o640, - "uid": -1, - "gid": -1, - "mod_time_unix_nano": time.Now().UnixNano(), - } - metaBytes, _ := json.Marshal(meta) - - f, err := os.Create(archivePath) - if err != nil { - t.Fatalf("create tar: %v", err) - } - tw := tar.NewWriter(f) - addFile := func(name string, payload []byte) { - h := &tar.Header{ - Name: name, - Typeflag: tar.TypeReg, - Mode: 0o640, - Size: int64(len(payload)), - } - if err := tw.WriteHeader(h); err != nil { - t.Fatalf("write header %s: %v", name, err) - } - if len(payload) > 0 { - if _, err := tw.Write(payload); err != nil { - t.Fatalf("write data %s: %v", name, err) - } - } - } - - addFile("var/lib/pve-cluster/config.db-wal.chunked", metaBytes) - addFile("chunked_files/var/lib/pve-cluster/config.db-wal.001.chunk", data[:6]) - addFile("chunked_files/var/lib/pve-cluster/config.db-wal.002.chunk", data[6:]) - addFile("etc/hosts", []byte("127.0.0.1 localhost\n")) - - if err := tw.Close(); err != nil { - t.Fatalf("close tar writer: %v", err) - } - if err := f.Close(); err != nil { - t.Fatalf("close tar file: %v", err) - } - - destRoot := filepath.Join(tmp, "out") - if err := os.MkdirAll(destRoot, 0o755); err != nil { - t.Fatalf("mkdir destRoot: %v", err) - } - - cats := []Category{{ - ID: "pve_cluster_dbwal", - Name: "PVE Cluster DB WAL (test)", - Paths: []string{ - "./var/lib/pve-cluster/config.db-wal", - }, - }} - - if err := extractArchiveNative(context.Background(), archivePath, destRoot, logger, cats, RestoreModeCustom, nil, "", nil); err != nil { - t.Fatalf("extractArchiveNative: %v", err) - } - - originalPath := filepath.Join(destRoot, "var", "lib", "pve-cluster", "config.db-wal") - got, err := os.ReadFile(originalPath) - if err != nil { - t.Fatalf("read reassembled file: %v", err) - } - if string(got) != string(data) { - t.Fatalf("reassembled content mismatch: got %q", string(got)) - } - - if _, err := os.Stat(originalPath + ".chunked"); !os.IsNotExist(err) { - t.Fatalf("marker file should be removed, stat err=%v", err) - } - if _, err := os.Stat(filepath.Join(destRoot, "chunked_files")); !os.IsNotExist(err) { - t.Fatalf("chunked_files dir should be removed, stat err=%v", err) - } - if _, err := os.Stat(filepath.Join(destRoot, "etc", "hosts")); !os.IsNotExist(err) { - t.Fatalf("unrelated file should not be extracted in selective mode, stat err=%v", err) - } -} diff --git a/internal/orchestrator/restore_filesystem.go b/internal/orchestrator/restore_filesystem.go index bb27ce9..9c0132d 100644 --- a/internal/orchestrator/restore_filesystem.go +++ b/internal/orchestrator/restore_filesystem.go @@ -111,11 +111,11 @@ type lsblkReport struct { } type lsblkDevice struct { - Name string `json:"name"` - Path string `json:"path"` - UUID string `json:"uuid"` - PartUUID string `json:"partuuid"` - Label string `json:"label"` + Name string `json:"name"` + Path string `json:"path"` + UUID string `json:"uuid"` + PartUUID string `json:"partuuid"` + Label string `json:"label"` Children []lsblkDevice `json:"children"` } diff --git a/internal/orchestrator/restore_ha_test.go b/internal/orchestrator/restore_ha_test.go index 3622968..c0fae98 100644 --- a/internal/orchestrator/restore_ha_test.go +++ b/internal/orchestrator/restore_ha_test.go @@ -84,4 +84,3 @@ func TestApplyPVEHAFromStage_DoesNotPruneWhenStageMissing(t *testing.T) { t.Fatalf("unexpected resources.cfg content: %q", string(data)) } } - diff --git a/internal/orchestrator/restore_sdn_test.go b/internal/orchestrator/restore_sdn_test.go index 3a288d9..ea72096 100644 --- a/internal/orchestrator/restore_sdn_test.go +++ b/internal/orchestrator/restore_sdn_test.go @@ -80,4 +80,3 @@ func TestApplyPVESDNFromStage_NoStageData_NoChanges(t *testing.T) { t.Fatalf("expected no applied paths, got=%v", applied) } } - diff --git a/internal/orchestrator/restore_tui_simulation_test.go b/internal/orchestrator/restore_tui_simulation_test.go index ff1226f..2ec843c 100644 --- a/internal/orchestrator/restore_tui_simulation_test.go +++ b/internal/orchestrator/restore_tui_simulation_test.go @@ -145,4 +145,3 @@ func TestSelectCategoriesTUI_CancelReturnsAborted(t *testing.T) { t.Fatalf("err=%v; want %v", err, ErrRestoreAborted) } } - diff --git a/internal/orchestrator/restore_workflow_abort_test.go b/internal/orchestrator/restore_workflow_abort_test.go index 5a1fed1..9dc5d31 100644 --- a/internal/orchestrator/restore_workflow_abort_test.go +++ b/internal/orchestrator/restore_workflow_abort_test.go @@ -93,10 +93,10 @@ func TestRunRestoreWorkflow_FstabPromptInputAborted_AbortsWorkflow(t *testing.T) logger := logging.New(types.LogLevelError, false) cfg := &config.Config{BaseDir: "/base"} ui := &fakeRestoreWorkflowUI{ - mode: RestoreModeCustom, - categories: []Category{mustCategoryByID(t, "filesystem")}, - confirmRestore: true, - confirmFstabMerge: false, + mode: RestoreModeCustom, + categories: []Category{mustCategoryByID(t, "filesystem")}, + confirmRestore: true, + confirmFstabMerge: false, confirmFstabMergeErr: input.ErrInputAborted, } diff --git a/internal/orchestrator/restore_workflow_ui_tfa_test.go b/internal/orchestrator/restore_workflow_ui_tfa_test.go index 647de55..cc9d298 100644 --- a/internal/orchestrator/restore_workflow_ui_tfa_test.go +++ b/internal/orchestrator/restore_workflow_ui_tfa_test.go @@ -49,4 +49,3 @@ func TestMaybeAddRecommendedCategoriesForTFA_DoesNotAddWhenDeclined(t *testing.T t.Fatalf("expected no categories to be added, got=%v", got) } } - diff --git a/internal/orchestrator/staging_test.go b/internal/orchestrator/staging_test.go deleted file mode 100644 index 7e6610a..0000000 --- a/internal/orchestrator/staging_test.go +++ /dev/null @@ -1,118 +0,0 @@ -package orchestrator - -import ( - "os" - "strings" - "testing" - "time" -) - -func TestCreateRestoreStageDir_Creates0700Directory(t *testing.T) { - origFS := restoreFS - origTime := restoreTime - t.Cleanup(func() { - restoreFS = origFS - restoreTime = origTime - }) - - fake := NewFakeFS() - t.Cleanup(func() { _ = os.RemoveAll(fake.Root) }) - restoreFS = fake - restoreTime = &FakeTime{Current: time.Unix(1700000000, 0)} - - stageRoot, err := createRestoreStageDir() - if err != nil { - t.Fatalf("createRestoreStageDir error: %v", err) - } - if !strings.Contains(stageRoot, "/tmp/proxsave/restore-stage-") { - t.Fatalf("stageRoot=%q; want under /tmp/proxsave/restore-stage-*", stageRoot) - } - - info, err := fake.Stat(stageRoot) - if err != nil { - t.Fatalf("Stat(%q): %v", stageRoot, err) - } - if info == nil || !info.IsDir() { - t.Fatalf("Stat(%q): isDir=%v; want dir", stageRoot, info != nil && info.IsDir()) - } - if perm := info.Mode().Perm(); perm != 0o700 { - t.Fatalf("stageRoot perm=%#o; want %#o", perm, 0o700) - } -} - -func TestCreateRestoreStageDir_UniqueBetweenCalls(t *testing.T) { - origFS := restoreFS - origTime := restoreTime - t.Cleanup(func() { - restoreFS = origFS - restoreTime = origTime - }) - - fake := NewFakeFS() - t.Cleanup(func() { _ = os.RemoveAll(fake.Root) }) - restoreFS = fake - restoreTime = &FakeTime{Current: time.Unix(1700000000, 0)} - - first, err := createRestoreStageDir() - if err != nil { - t.Fatalf("first createRestoreStageDir error: %v", err) - } - second, err := createRestoreStageDir() - if err != nil { - t.Fatalf("second createRestoreStageDir error: %v", err) - } - if first == second { - t.Fatalf("stageRoot collision: %q", first) - } -} - -func TestCleanupOldRestoreStageDirs_RemovesOnlyOldDirs(t *testing.T) { - fake := NewFakeFS() - t.Cleanup(func() { _ = os.RemoveAll(fake.Root) }) - - base := "/tmp/proxsave" - oldDir := base + "/restore-stage-old" - newDir := base + "/restore-stage-new" - - if err := fake.MkdirAll(base, 0o755); err != nil { - t.Fatalf("MkdirAll(%q): %v", base, err) - } - if err := fake.MkdirAll(oldDir, 0o700); err != nil { - t.Fatalf("MkdirAll(%q): %v", oldDir, err) - } - if err := fake.MkdirAll(newDir, 0o700); err != nil { - t.Fatalf("MkdirAll(%q): %v", newDir, err) - } - if err := fake.WriteFile(base+"/restore-stage-file", []byte("x"), 0o600); err != nil { - t.Fatalf("WriteFile restore-stage-file: %v", err) - } - - now := time.Unix(1700000000, 0).UTC() - oldTime := now.Add(-48 * time.Hour) - newTime := now.Add(-1 * time.Hour) - - if err := os.Chtimes(fake.onDisk(oldDir), oldTime, oldTime); err != nil { - t.Fatalf("Chtimes(oldDir): %v", err) - } - if err := os.Chtimes(fake.onDisk(newDir), newTime, newTime); err != nil { - t.Fatalf("Chtimes(newDir): %v", err) - } - - removed, failed := cleanupOldRestoreStageDirs(fake, nil, now, 24*time.Hour) - if failed != 0 { - t.Fatalf("failed=%d; want 0", failed) - } - if removed != 1 { - t.Fatalf("removed=%d; want 1", removed) - } - - if _, err := fake.Stat(oldDir); err == nil || !os.IsNotExist(err) { - t.Fatalf("oldDir still exists (err=%v); want removed", err) - } - if _, err := fake.Stat(newDir); err != nil { - t.Fatalf("newDir missing (err=%v); want kept", err) - } - if _, err := fake.Stat(base + "/restore-stage-file"); err != nil { - t.Fatalf("restore-stage-file missing (err=%v); want kept", err) - } -} diff --git a/internal/orchestrator/tui_hooks.go b/internal/orchestrator/tui_hooks.go index fe2cedb..e347e42 100644 --- a/internal/orchestrator/tui_hooks.go +++ b/internal/orchestrator/tui_hooks.go @@ -4,4 +4,3 @@ import "github.com/tis24dev/proxsave/internal/tui" // newTUIApp is an injection point for tests. Production uses tui.NewApp. var newTUIApp = tui.NewApp - diff --git a/internal/orchestrator/tui_simulation_test.go b/internal/orchestrator/tui_simulation_test.go index 7f52aef..27dd3d0 100644 --- a/internal/orchestrator/tui_simulation_test.go +++ b/internal/orchestrator/tui_simulation_test.go @@ -30,19 +30,19 @@ func withSimAppSequence(t *testing.T, keys []simKey) { app.SetScreen(screen) go func() { - // Wait for app.Run() to start event processing. - time.Sleep(50 * time.Millisecond) - for _, k := range keys { - mod := k.Mod - if mod == 0 { - mod = tcell.ModNone + // Wait for app.Run() to start event processing. + time.Sleep(50 * time.Millisecond) + for _, k := range keys { + mod := k.Mod + if mod == 0 { + mod = tcell.ModNone + } + screen.InjectKey(k.Key, k.R, mod) + time.Sleep(10 * time.Millisecond) } - screen.InjectKey(k.Key, k.R, mod) - time.Sleep(10 * time.Millisecond) - } - }() - return app -} + }() + return app + } t.Cleanup(func() { newTUIApp = orig diff --git a/internal/security/security.go b/internal/security/security.go index 00eea88..9edd76a 100644 --- a/internal/security/security.go +++ b/internal/security/security.go @@ -232,31 +232,31 @@ func (c *Checker) buildDependencyList() []dependencyEntry { } emailMethod := strings.ToLower(strings.TrimSpace(c.cfg.EmailDeliveryMethod)) - if emailMethod == "" { - emailMethod = "relay" - } - if emailMethod == "pmf" { - deps = append(deps, c.binaryDependency( - "proxmox-mail-forward", - []string{"/usr/libexec/proxmox-mail-forward", "/usr/bin/proxmox-mail-forward", "proxmox-mail-forward"}, - true, - "email delivery method set to pmf (Proxmox Notifications via proxmox-mail-forward)", - )) - } else if emailMethod == "sendmail" { - deps = append(deps, c.binaryDependency( - "sendmail", - []string{"/usr/sbin/sendmail", "sendmail"}, - true, - "email delivery method set to sendmail (/usr/sbin/sendmail)", - )) - } else if emailMethod == "relay" && c.cfg.EmailFallbackSendmail { - deps = append(deps, c.binaryDependency( - "proxmox-mail-forward", - []string{"/usr/libexec/proxmox-mail-forward", "/usr/bin/proxmox-mail-forward", "proxmox-mail-forward"}, - false, - "email relay fallback to pmf enabled (uses proxmox-mail-forward)", - )) - } + if emailMethod == "" { + emailMethod = "relay" + } + if emailMethod == "pmf" { + deps = append(deps, c.binaryDependency( + "proxmox-mail-forward", + []string{"/usr/libexec/proxmox-mail-forward", "/usr/bin/proxmox-mail-forward", "proxmox-mail-forward"}, + true, + "email delivery method set to pmf (Proxmox Notifications via proxmox-mail-forward)", + )) + } else if emailMethod == "sendmail" { + deps = append(deps, c.binaryDependency( + "sendmail", + []string{"/usr/sbin/sendmail", "sendmail"}, + true, + "email delivery method set to sendmail (/usr/sbin/sendmail)", + )) + } else if emailMethod == "relay" && c.cfg.EmailFallbackSendmail { + deps = append(deps, c.binaryDependency( + "proxmox-mail-forward", + []string{"/usr/libexec/proxmox-mail-forward", "/usr/bin/proxmox-mail-forward", "proxmox-mail-forward"}, + false, + "email relay fallback to pmf enabled (uses proxmox-mail-forward)", + )) + } if c.cfg.BackupCephConfig { deps = append(deps, c.binaryDependency("ceph", []string{"ceph"}, false, "Ceph configuration collection enabled")) diff --git a/internal/security/security_test.go b/internal/security/security_test.go index 6f98f4c..09998d0 100644 --- a/internal/security/security_test.go +++ b/internal/security/security_test.go @@ -143,30 +143,30 @@ func TestCheckDependenciesMissingRequiredAddsError(t *testing.T) { } } - func TestCheckDependenciesMissingOptionalAddsWarning(t *testing.T) { - cfg := &config.Config{ - CompressionType: types.CompressionNone, // only tar required - EmailDeliveryMethod: "relay", - EmailFallbackSendmail: true, // pmf becomes optional dependency (relay fallback) - } - checker := newCheckerForTest(cfg, stubLookPath(map[string]bool{ - "tar": true, // present - // proxmox-mail-forward missing -> warning - })) +func TestCheckDependenciesMissingOptionalAddsWarning(t *testing.T) { + cfg := &config.Config{ + CompressionType: types.CompressionNone, // only tar required + EmailDeliveryMethod: "relay", + EmailFallbackSendmail: true, // pmf becomes optional dependency (relay fallback) + } + checker := newCheckerForTest(cfg, stubLookPath(map[string]bool{ + "tar": true, // present + // proxmox-mail-forward missing -> warning + })) checker.checkDependencies() if got := checker.result.WarningCount(); got != 1 { t.Fatalf("expected 1 warning, got %d issues=%+v", got, checker.result.Issues) } - msg := checker.result.Issues[0].Message - if !strings.Contains(msg, "Optional dependency") || !strings.Contains(msg, "proxmox-mail-forward") { - t.Fatalf("unexpected warning message: %s", msg) - } - if checker.result.ErrorCount() != 0 { - t.Fatalf("expected no errors, got %d", checker.result.ErrorCount()) - } + msg := checker.result.Issues[0].Message + if !strings.Contains(msg, "Optional dependency") || !strings.Contains(msg, "proxmox-mail-forward") { + t.Fatalf("unexpected warning message: %s", msg) } + if checker.result.ErrorCount() != 0 { + t.Fatalf("expected no errors, got %d", checker.result.ErrorCount()) + } +} func TestParseSSLineProgramExtraction(t *testing.T) { line := `tcp LISTEN 0 128 0.0.0.0:22 0.0.0.0:* users:(("sshd",pid=1234,fd=3))` diff --git a/internal/support/support_test.go b/internal/support/support_test.go index 107d1fc..f8d303e 100644 --- a/internal/support/support_test.go +++ b/internal/support/support_test.go @@ -25,9 +25,9 @@ type fakeNotifier struct { err error } -func (f *fakeNotifier) Name() string { return "fake-email" } -func (f *fakeNotifier) IsEnabled() bool { return f.enabled } -func (f *fakeNotifier) IsCritical() bool { return false } +func (f *fakeNotifier) Name() string { return "fake-email" } +func (f *fakeNotifier) IsEnabled() bool { return f.enabled } +func (f *fakeNotifier) IsCritical() bool { return false } func (f *fakeNotifier) Send(ctx context.Context, data *notify.NotificationData) (*notify.NotificationResult, error) { f.sent++ f.last = data @@ -84,9 +84,9 @@ func TestRunIntro_DeclinedConsent(t *testing.T) { func TestRunIntro_FullFlowWithRetries(t *testing.T) { withStdinFile(t, strings.Join([]string{ - "y", // accept - "y", // has issue - "", // empty nickname -> retry + "y", // accept + "y", // has issue + "", // empty nickname -> retry "user", // nickname "abc", // invalid issue (missing #) "#no", // invalid issue (non-numeric) From faba5c763b415d693f9e5170b37629abb1886f97 Mon Sep 17 00:00:00 2001 From: Damiano <71268257+tis24dev@users.noreply.github.com> Date: Fri, 20 Feb 2026 16:59:01 +0100 Subject: [PATCH 24/24] Update docs: PXAR config, lockfile, templates Adjust documentation and template comments around PXAR scanning and locking behavior. Changes include: - docs/BACKUP_ENV_MAPPING.md: update legacy mappings for PXAR-related env vars (PXAR_STOP_ON_CAP, BACKUP_SMALL_PXAR, MAX_PXAR_SIZE) and clarify PXAR metadata sampling semantics. - docs/CONFIGURATION.md: remove several low-level PXAR tuning options from the public config example, simplify PXAR include pattern default, and add a note that include/exclude patterns are reused for PVE datastore sampling. - docs/TROUBLESHOOTING.md: clarify the lock file format and behavior (pid/host/time and PID liveness checks to avoid stuck locks). - internal/config/templates/backup.env: update PXAR file pattern comments (localized phrasing) and clarify defaults. These edits clean up deprecated/legacy config surface, clarify runtime behavior, and harmonize documentation with the Go implementation. --- docs/BACKUP_ENV_MAPPING.md | 6 +++--- docs/CONFIGURATION.md | 24 ++++++++++-------------- docs/TROUBLESHOOTING.md | 2 +- internal/config/templates/backup.env | 4 ++-- 4 files changed, 16 insertions(+), 20 deletions(-) diff --git a/docs/BACKUP_ENV_MAPPING.md b/docs/BACKUP_ENV_MAPPING.md index 7c97ccd..e10ec58 100644 --- a/docs/BACKUP_ENV_MAPPING.md +++ b/docs/BACKUP_ENV_MAPPING.md @@ -68,7 +68,6 @@ PORT_WHITELIST = SAME PVE_BACKUP_INCLUDE_PATTERN = SAME PVE_CLUSTER_PATH = SAME PVE_CONFIG_PATH = SAME -PXAR_STOP_ON_CAP = SAME RCLONE_BANDWIDTH_LIMIT = SAME RCLONE_FLAGS = SAME SECONDARY_LOG_PATH = SAME @@ -145,17 +144,18 @@ STORAGE_WARNING_THRESHOLD_SECONDARY = SEMANTIC CHANGE → MIN_DISK_SPACE_SECONDA AUTO_DETECT_DATASTORES = LEGACY (Bash only, auto-detect handled internally in Go) BACKUP_COROSYNC_CONFIG = LEGACY (Go always uses COROSYNC_CONFIG_PATH / cluster) -BACKUP_SMALL_PXAR = LEGACY (in Go, PXAR tuning is more granular via PXAR_*_*) +BACKUP_SMALL_PXAR = LEGACY (no equivalent in Go; PXAR metadata sampling is bounded) CLOUD_BACKUP_REQUIRED = LEGACY (secondary is always optional = warning only, non-blocking) CLOUD_PARALLEL_UPLOAD_TIMEOUT = LEGACY (in Go, timeouts are RCLONE_TIMEOUT_*) ENABLE_EMOJI_LOG = LEGACY (log formatting handled internally in Go) ENABLE_LOG_MANAGEMENT = LEGACY (log management in Go via LogPath/retention) MAX_CLOUD_LOGS = LEGACY (Bash only; in Go log retention follows MAX_CLOUD_BACKUPS/CloudRetentionDays) MAX_LOCAL_LOGS = LEGACY (Bash only; in Go log retention follows MAX_LOCAL_BACKUPS/LocalRetentionDays) -MAX_PXAR_SIZE = LEGACY (in Go there are PXAR_SCAN_MAX_ROOTS / budget, not the same semantics) +MAX_PXAR_SIZE = LEGACY (no equivalent in Go) MAX_SECONDARY_LOGS = LEGACY (Bash only; in Go log retention follows MAX_SECONDARY_BACKUPS/SecondaryRetentionDays) MIN_BASH_VERSION = LEGACY (specific only to Bash script) MULTI_STORAGE_PARALLEL = LEGACY (in Go there is parallel storage management, not controlled by this variable) +PXAR_STOP_ON_CAP = LEGACY (Go no longer uses this tuning knob) REMOVE_UNAUTHORIZED_FILES = LEGACY (in Go there is no hard delete flag; checks are more conservative) SECONDARY_BACKUP_REQUIRED = LEGACY (secondary is always optional = warning only, non-blocking) SKIP_CLOUD_VERIFICATION = LEGACY (verifications always performed) diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 8db9ab0..fddead4 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -958,13 +958,13 @@ BACKUP_PVE_SCHEDULES=true # Cron schedules # Replication BACKUP_PVE_REPLICATION=true # VM/CT replication config - # PVE backup files - BACKUP_PVE_BACKUP_FILES=true # Include backup files from /var/lib/vz/dump - PVESH_TIMEOUT=15 # Timeout (seconds) for each `pvesh` call (0=disabled) - FS_IO_TIMEOUT=30 # Timeout (seconds) for filesystem probes on storages (stat/readdir/statfs). Helps avoid hangs on unreachable network mounts (0=disabled) - BACKUP_SMALL_PVE_BACKUPS=false # Include small backups only - MAX_PVE_BACKUP_SIZE=100M # Max size for "small" backups - PVE_BACKUP_INCLUDE_PATTERN= # Glob patterns to include +# PVE backup files +BACKUP_PVE_BACKUP_FILES=true # Include backup files from /var/lib/vz/dump +PVESH_TIMEOUT=15 # Timeout (seconds) for each `pvesh` call (0=disabled) +FS_IO_TIMEOUT=30 # Timeout (seconds) for filesystem probes on storages (stat/readdir/statfs). Helps avoid hangs on unreachable network mounts (0=disabled) +BACKUP_SMALL_PVE_BACKUPS=false # Include small backups only +MAX_PVE_BACKUP_SIZE=100M # Max size for "small" backups +PVE_BACKUP_INCLUDE_PATTERN= # Glob patterns to include # Ceph configuration BACKUP_CEPH_CONFIG=false # Ceph cluster config @@ -1024,13 +1024,7 @@ BACKUP_PRUNE_SCHEDULES=true # Retention prune schedules # PXAR metadata scanning PXAR_SCAN_ENABLE=false # Enable PXAR file metadata collection PXAR_SCAN_DS_CONCURRENCY=3 # Datastores scanned in parallel -PXAR_SCAN_INTRA_CONCURRENCY=4 # Workers per datastore -PXAR_SCAN_FANOUT_LEVEL=2 # Directory depth for fan-out -PXAR_SCAN_MAX_ROOTS=2048 # Max worker roots per datastore -PXAR_STOP_ON_CAP=false # Stop enumeration at max roots -PXAR_ENUM_READDIR_WORKERS=4 # Parallel ReadDir workers -PXAR_ENUM_BUDGET_MS=0 # Time budget for enumeration (0=disabled) -PXAR_FILE_INCLUDE_PATTERN= # Include patterns (default: *.pxar, catalog.pxar*) +PXAR_FILE_INCLUDE_PATTERN= # Include patterns (default: *.pxar, *.pxar.*, catalog.pxar*) PXAR_FILE_EXCLUDE_PATTERN= # Exclude patterns (e.g., *.tmp, *.lock) ``` @@ -1038,6 +1032,8 @@ PXAR_FILE_EXCLUDE_PATTERN= # Exclude patterns (e.g., *.tmp, *.lock) **PXAR scanning**: Collects metadata from Proxmox Backup Server .pxar archives. +**Note**: `PXAR_FILE_INCLUDE_PATTERN` and `PXAR_FILE_EXCLUDE_PATTERN` are also reused for file sampling in PVE datastore metadata. Leave them empty to use the built-in defaults per platform. + ### Override Collection Paths ```bash diff --git a/docs/TROUBLESHOOTING.md b/docs/TROUBLESHOOTING.md index 524fe09..04c7e84 100644 --- a/docs/TROUBLESHOOTING.md +++ b/docs/TROUBLESHOOTING.md @@ -1021,7 +1021,7 @@ A: Use `--dry-run` mode: `./build/proxsave --dry-run --log-level debug` A: Update your configuration: `./build/proxsave --upgrade-config` **Q: Can I run backup while another backup is in progress?** -A: No. Use a lock file (`BACKUP_PATH/.backup.lock`) to prevent concurrent runs. +A: No. Proxsave uses a lock file (`BACKUP_PATH/.backup.lock`) to prevent concurrent runs. The lock stores `pid/host/time`; on the same host, proxsave checks PID liveness to avoid “stuck” locks after an interrupted run. **Q: Backup hangs during PVE datastore detection when a network storage is unreachable.** A: Set `FS_IO_TIMEOUT` to cap how long proxsave waits for filesystem probes (stat/readdir/statfs), and `PVESH_TIMEOUT` to cap `pvesh` calls. This reduces the likelihood of indefinite hangs when a storage becomes unreachable mid-run. diff --git a/internal/config/templates/backup.env b/internal/config/templates/backup.env index ee03cfb..9c3ec39 100644 --- a/internal/config/templates/backup.env +++ b/internal/config/templates/backup.env @@ -309,8 +309,8 @@ BACKUP_PBS_NETWORK_CONFIG=true # network.cfg (PBS), independent from BACKUP BACKUP_PRUNE_SCHEDULES=true PXAR_SCAN_ENABLE=false PXAR_SCAN_DS_CONCURRENCY=3 # Datastores scanned in parallel for PXAR metadata -PXAR_FILE_INCLUDE_PATTERN= # Space/comma separated patterns to locate PXAR files (default: *.pxar,*.pxar.*,catalog.pxar*) -PXAR_FILE_EXCLUDE_PATTERN= # Patterns to exclude while sampling files (e.g. *.tmp, *.lock) +PXAR_FILE_INCLUDE_PATTERN= # Pattern (spazio/virgola) per campionare file di archivio (PBS: *.pxar/catalog.pxar*, PVE: backup vzdump). Lascia vuoto per usare i default. +PXAR_FILE_EXCLUDE_PATTERN= # Pattern da escludere durante il sampling (vale sia per PBS che per PVE; es: *.tmp, *.lock) # Override collection paths (use only if directories differ from defaults) # Note: $VAR / ${VAR} expansion resolves keys from this file too (no need to export).