diff --git a/Makefile b/Makefile index 3b936c9..e3041d4 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,25 @@ COVERAGE_THRESHOLD ?= 50.0 # Build del progetto build: @echo "Building proxsave..." - @VERSION=$$(git describe --tags --abbrev=0 2>/dev/null || echo 0.0.0-dev); \ + @VERSION=$$( \ + if git describe --tags --exact-match >/dev/null 2>&1 && [ -z "$$(git status --porcelain 2>/dev/null)" ]; then \ + git describe --tags --abbrev=0 2>/dev/null || echo 0.0.0-dev; \ + else \ + desc=$$(git describe --tags --long --dirty --always 2>/dev/null || echo dev); \ + dirty=""; \ + case "$$desc" in *-dirty) dirty=".dirty"; desc=$${desc%-dirty};; esac; \ + sha_part=$${desc##*-}; \ + sha=$${sha_part#g}; \ + rest=$${desc%-*}; \ + n=$${rest##*-}; \ + tag=$${rest%-*}; \ + if [ "$$tag" = "$$desc" ] || [ -z "$$n" ] || [ -z "$$sha" ] || [ "$$sha_part" = "$$desc" ]; then \ + echo "0.0.0-dev.0+g$${desc}$$dirty"; \ + else \ + echo "$$tag-dev.$$n+g$$sha$$dirty"; \ + fi; \ + fi \ + ); \ COMMIT=$$(git rev-parse --short HEAD 2>/dev/null || echo dev); \ BUILD_TIME=$$(date -u +"%Y-%m-%dT%H:%M:%SZ"); \ go build -ldflags="-X 'main.buildTime=$$BUILD_TIME' -X 'github.com/tis24dev/proxsave/internal/version.Version=$$VERSION' -X 'github.com/tis24dev/proxsave/internal/version.Commit=$$COMMIT' -X 'github.com/tis24dev/proxsave/internal/version.Date=$$BUILD_TIME'" -o build/proxsave ./cmd/proxsave @@ -13,7 +31,25 @@ build: # Build ottimizzato per release build-release: @echo "Building release..." - @VERSION=$$(git describe --tags --abbrev=0 2>/dev/null || echo 0.0.0-dev); \ + @VERSION=$$( \ + if git describe --tags --exact-match >/dev/null 2>&1 && [ -z "$$(git status --porcelain 2>/dev/null)" ]; then \ + git describe --tags --abbrev=0 2>/dev/null || echo 0.0.0-dev; \ + else \ + desc=$$(git describe --tags --long --dirty --always 2>/dev/null || echo dev); \ + dirty=""; \ + case "$$desc" in *-dirty) dirty=".dirty"; desc=$${desc%-dirty};; esac; \ + sha_part=$${desc##*-}; \ + sha=$${sha_part#g}; \ + rest=$${desc%-*}; \ + n=$${rest##*-}; \ + tag=$${rest%-*}; \ + if [ "$$tag" = "$$desc" ] || [ -z "$$n" ] || [ -z "$$sha" ] || [ "$$sha_part" = "$$desc" ]; then \ + echo "0.0.0-dev.0+g$${desc}$$dirty"; \ + else \ + echo "$$tag-dev.$$n+g$$sha$$dirty"; \ + fi; \ + fi \ + ); \ COMMIT=$$(git rev-parse --short HEAD 2>/dev/null || echo dev); \ BUILD_TIME=$$(date -u +"%Y-%m-%dT%H:%M:%SZ"); \ go build -ldflags="-s -w -X 'main.buildTime=$$BUILD_TIME' -X 'github.com/tis24dev/proxsave/internal/version.Version=$$VERSION' -X 'github.com/tis24dev/proxsave/internal/version.Commit=$$COMMIT' -X 'github.com/tis24dev/proxsave/internal/version.Date=$$BUILD_TIME'" -o build/proxsave ./cmd/proxsave diff --git a/cmd/prefilter-manual/main.go b/cmd/prefilter-manual/main.go new file mode 100644 index 0000000..6f0d1a8 --- /dev/null +++ b/cmd/prefilter-manual/main.go @@ -0,0 +1,59 @@ +package main + +import ( + "context" + "flag" + "os" + "path/filepath" + "strings" + + "github.com/tis24dev/proxsave/internal/backup" + "github.com/tis24dev/proxsave/internal/logging" + "github.com/tis24dev/proxsave/internal/types" +) + +func parseLogLevel(raw string) types.LogLevel { + switch strings.ToLower(strings.TrimSpace(raw)) { + case "debug": + return types.LogLevelDebug + case "info", "": + return types.LogLevelInfo + case "warning", "warn": + return types.LogLevelWarning + case "error": + return types.LogLevelError + default: + return types.LogLevelInfo + } +} + +func main() { + var ( + root string + maxSize int64 + levelLabel string + ) + + flag.StringVar(&root, "root", "/tmp/test_prefilter", "Root directory to run prefilter on") + flag.Int64Var(&maxSize, "max-size", 8*1024*1024, "Max file size (bytes) to prefilter") + flag.StringVar(&levelLabel, "log-level", "info", "Log level: debug|info|warn|error") + flag.Parse() + + root = filepath.Clean(strings.TrimSpace(root)) + if root == "" || root == "." { + root = string(os.PathSeparator) + } + + logger := logging.New(parseLogLevel(levelLabel), false) + logger.SetOutput(os.Stdout) + + cfg := backup.OptimizationConfig{ + EnablePrefilter: true, + PrefilterMaxFileSizeBytes: maxSize, + } + + if err := backup.ApplyOptimizations(context.Background(), logger, root, cfg); err != nil { + logger.Error("Prefilter failed: %v", err) + os.Exit(1) + } +} diff --git a/cmd/proxsave/main.go b/cmd/proxsave/main.go index 7241cd8..1d51c69 100644 --- a/cmd/proxsave/main.go +++ b/cmd/proxsave/main.go @@ -1057,6 +1057,7 @@ func run() int { checkerConfig.MinDiskPrimaryGB = cfg.MinDiskPrimaryGB checkerConfig.MinDiskSecondaryGB = cfg.MinDiskSecondaryGB checkerConfig.MinDiskCloudGB = cfg.MinDiskCloudGB + checkerConfig.FsIoTimeout = time.Duration(cfg.FsIoTimeoutSeconds) * time.Second checkerConfig.DryRun = dryRun checkerDone := logging.DebugStart(logger, "pre-backup check config", "dry_run=%v", dryRun) if err := checkerConfig.Validate(); err != nil { diff --git a/cmd/proxsave/runtime_helpers.go b/cmd/proxsave/runtime_helpers.go index 5312a73..b95d90e 100644 --- a/cmd/proxsave/runtime_helpers.go +++ b/cmd/proxsave/runtime_helpers.go @@ -273,13 +273,13 @@ func detectFilesystemInfo(ctx context.Context, backend storage.Storage, path str return nil, nil } - if !fsInfo.SupportsOwnership { - if backend != nil && backend.Location() == storage.LocationCloud { - logger.Debug("%s [%s] does not support ownership changes (cloud remote); chown/chmod already disabled", path, fsInfo.Type) - } else { - logger.Info("%s [%s] does not support ownership changes; chown/chmod will be skipped", path, fsInfo.Type) - } + if !fsInfo.SupportsOwnership { + if backend != nil && backend.Location() == storage.LocationCloud { + logger.Debug("%s [%s] does not support ownership changes (cloud remote); chown/chmod already disabled", path, fsInfo.Type) + } else { + logger.Info("%s [%s] does not support ownership changes; chown/chmod will be skipped", path, fsInfo.Type) } + } return fsInfo, nil } diff --git a/docs/BACKUP_ENV_MAPPING.md b/docs/BACKUP_ENV_MAPPING.md index 9d1057e..e10ec58 100644 --- a/docs/BACKUP_ENV_MAPPING.md +++ b/docs/BACKUP_ENV_MAPPING.md @@ -68,7 +68,6 @@ PORT_WHITELIST = SAME PVE_BACKUP_INCLUDE_PATTERN = SAME PVE_CLUSTER_PATH = SAME PVE_CONFIG_PATH = SAME -PXAR_STOP_ON_CAP = SAME RCLONE_BANDWIDTH_LIMIT = SAME RCLONE_FLAGS = SAME SECONDARY_LOG_PATH = SAME @@ -88,12 +87,17 @@ WEBHOOK_TIMEOUT = SAME ## Go-only variables (new) SYSTEM_ROOT_PREFIX = NEW (Go-only) → Override system root for collection (testing/chroot). Empty or "/" uses the real root. +PVESH_TIMEOUT = NEW (Go-only) → Timeout (seconds) for each `pvesh` command execution (0=disabled). +FS_IO_TIMEOUT = NEW (Go-only) → Timeout (seconds) for filesystem probes (stat/readdir/statfs) on storages (0=disabled). Helps avoid hangs on unreachable network mounts. +NOTE: PBS restore behavior is selected interactively during `--restore` and is intentionally not configured via `backup.env`. BACKUP_PBS_S3_ENDPOINTS = NEW (Go-only) → Collect `s3.cfg` and S3 endpoint snapshots (PBS). BACKUP_PBS_NODE_CONFIG = NEW (Go-only) → Collect `node.cfg` and node snapshots (PBS). BACKUP_PBS_ACME_ACCOUNTS = NEW (Go-only) → Collect `acme/accounts.cfg` and ACME account snapshots (PBS). BACKUP_PBS_ACME_PLUGINS = NEW (Go-only) → Collect `acme/plugins.cfg` and ACME plugin snapshots (PBS). BACKUP_PBS_METRIC_SERVERS = NEW (Go-only) → Collect `metricserver.cfg` (PBS). BACKUP_PBS_TRAFFIC_CONTROL = NEW (Go-only) → Collect `traffic-control.cfg` and traffic-control snapshots (PBS). +BACKUP_PBS_NOTIFICATIONS = NEW (Go-only) → Collect `notifications.cfg` and notification snapshots (PBS). +BACKUP_PBS_NOTIFICATIONS_PRIV = NEW (Go-only) → Collect `notifications-priv.cfg` (PBS notification secrets/credentials). BACKUP_PBS_NETWORK_CONFIG = NEW (Go-only) → Collect `network.cfg` and network snapshots (PBS), independent from BACKUP_NETWORK_CONFIGS (system). ## Renamed variables / Supported aliases in Go @@ -140,17 +144,18 @@ STORAGE_WARNING_THRESHOLD_SECONDARY = SEMANTIC CHANGE → MIN_DISK_SPACE_SECONDA AUTO_DETECT_DATASTORES = LEGACY (Bash only, auto-detect handled internally in Go) BACKUP_COROSYNC_CONFIG = LEGACY (Go always uses COROSYNC_CONFIG_PATH / cluster) -BACKUP_SMALL_PXAR = LEGACY (in Go, PXAR tuning is more granular via PXAR_*_*) +BACKUP_SMALL_PXAR = LEGACY (no equivalent in Go; PXAR metadata sampling is bounded) CLOUD_BACKUP_REQUIRED = LEGACY (secondary is always optional = warning only, non-blocking) CLOUD_PARALLEL_UPLOAD_TIMEOUT = LEGACY (in Go, timeouts are RCLONE_TIMEOUT_*) ENABLE_EMOJI_LOG = LEGACY (log formatting handled internally in Go) ENABLE_LOG_MANAGEMENT = LEGACY (log management in Go via LogPath/retention) MAX_CLOUD_LOGS = LEGACY (Bash only; in Go log retention follows MAX_CLOUD_BACKUPS/CloudRetentionDays) MAX_LOCAL_LOGS = LEGACY (Bash only; in Go log retention follows MAX_LOCAL_BACKUPS/LocalRetentionDays) -MAX_PXAR_SIZE = LEGACY (in Go there are PXAR_SCAN_MAX_ROOTS / budget, not the same semantics) +MAX_PXAR_SIZE = LEGACY (no equivalent in Go) MAX_SECONDARY_LOGS = LEGACY (Bash only; in Go log retention follows MAX_SECONDARY_BACKUPS/SecondaryRetentionDays) MIN_BASH_VERSION = LEGACY (specific only to Bash script) MULTI_STORAGE_PARALLEL = LEGACY (in Go there is parallel storage management, not controlled by this variable) +PXAR_STOP_ON_CAP = LEGACY (Go no longer uses this tuning knob) REMOVE_UNAUTHORIZED_FILES = LEGACY (in Go there is no hard delete flag; checks are more conservative) SECONDARY_BACKUP_REQUIRED = LEGACY (secondary is always optional = warning only, non-blocking) SKIP_CLOUD_VERIFICATION = LEGACY (verifications always performed) diff --git a/docs/CLI_REFERENCE.md b/docs/CLI_REFERENCE.md index 4aea693..eae8335 100644 --- a/docs/CLI_REFERENCE.md +++ b/docs/CLI_REFERENCE.md @@ -789,6 +789,9 @@ CONFIG_FILE=/etc/pbs/prod.env ./build/proxsave # Force dry-run mode DRY_RUN=true ./build/proxsave +# PBS restore behavior +# Selected interactively during `--restore` on PBS hosts (Merge vs Clean 1:1). + # Set debug level DEBUG_LEVEL=extreme ./build/proxsave --log-level debug diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 33deac6..fddead4 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -6,6 +6,7 @@ Complete reference for all 200+ configuration variables in `configs/backup.env`. - [Configuration File Location](#configuration-file-location) - [General Settings](#general-settings) +- [Restore (PBS)](#restore-pbs) - [Security Settings](#security-settings) - [Disk Space](#disk-space) - [Storage Paths](#storage-paths) @@ -74,6 +75,25 @@ PROFILING_ENABLED=true # true | false (profiles written under LOG_PA --- +## Restore (PBS) + +PBS restore behavior is chosen **interactively at restore time** on PBS hosts (not via `backup.env`). + +You will be asked to choose a behavior: +- **Merge (existing PBS)**: intended for restoring onto an already operational PBS; ProxSave applies supported PBS categories via `proxmox-backup-manager` without deleting existing objects that are not in the backup. +- **Clean 1:1 (fresh PBS install)**: intended for restoring onto a new, clean PBS; ProxSave attempts to make supported PBS objects match the backup (may remove objects that exist on the system but are not in the backup). + +ProxSave applies supported PBS staged categories via API automatically (and may fall back to file-based staged apply only in **Clean 1:1** mode). + +**Current API coverage**: +- Node + traffic control (`pbs_host`) +- Datastores + S3 endpoints (`datastore_pbs`) +- Remotes (`pbs_remotes`) +- Jobs (sync/verify/prune) (`pbs_jobs`) +- Notifications endpoints/matchers (`pbs_notifications`) + +--- + ## Security Settings ```bash @@ -321,7 +341,29 @@ PREFILTER_MAX_FILE_SIZE_MB=8 # Skip prefilter for files >8MB - **Smart chunking**: Splits large files for parallel processing - **Deduplication**: Detects duplicate data blocks (reduces storage) -- **Prefilter**: Analyzes small files before compression (optimizes algorithm selection) +- **Prefilter**: Applies safe, semantic-preserving normalization to small text/JSON files to improve compression (e.g. removes CR from CRLF line endings and minifies JSON). It does **not** reorder, de-indent, or strip structured configuration files, and it avoids touching Proxmox/PBS structured config paths (e.g. `etc/pve/**`, `etc/proxmox-backup/**`). + +### Prefilter (`ENABLE_PREFILTER`) — details and risks + +**What it does** (on the *staged* backup tree, before compression): +- Removes `\r` from CRLF text files (`.txt`, `.log`, `.md`, `.conf`, `.cfg`, `.ini`) to normalize line endings +- Minifies JSON (`.json`) while keeping valid JSON semantics + +**What it does not do**: +- It does **not** reorder lines, remove indentation, or otherwise rewrite whitespace/ordering-sensitive structured configs. +- It does **not** follow symlinks (symlinks are skipped). +- It skips Proxmox/PBS structured configuration paths where formatting/order matters, such as: + - `etc/pve/**` + - `etc/proxmox-backup/**` + - `etc/systemd/system/**` + - `etc/ssh/**` + - `etc/pam.d/**` + +**Why you might disable it** (even though it's safe): +- If you need maximum fidelity (bit-for-bit) of text/JSON formatting as originally collected (CRLF preservation, JSON pretty-printing, etc.) +- If you prefer the most conservative pipeline possible (forensics/compliance) + +**Important**: Prefilter never edits files on the host system — it only operates on the temporary staging directory that will be archived. --- @@ -918,6 +960,8 @@ BACKUP_PVE_REPLICATION=true # VM/CT replication config # PVE backup files BACKUP_PVE_BACKUP_FILES=true # Include backup files from /var/lib/vz/dump +PVESH_TIMEOUT=15 # Timeout (seconds) for each `pvesh` call (0=disabled) +FS_IO_TIMEOUT=30 # Timeout (seconds) for filesystem probes on storages (stat/readdir/statfs). Helps avoid hangs on unreachable network mounts (0=disabled) BACKUP_SMALL_PVE_BACKUPS=false # Include small backups only MAX_PVE_BACKUP_SIZE=100M # Max size for "small" backups PVE_BACKUP_INCLUDE_PATTERN= # Glob patterns to include @@ -938,6 +982,24 @@ BACKUP_VM_CONFIGS=true # VM/CT config files # PBS datastore configs BACKUP_DATASTORE_CONFIGS=true # Datastore definitions +# S3 endpoints (used by S3 datastores) +BACKUP_PBS_S3_ENDPOINTS=true # s3.cfg (S3 endpoints, used by S3 datastores) + +# Node/global config +BACKUP_PBS_NODE_CONFIG=true # node.cfg (global PBS settings) + +# ACME +BACKUP_PBS_ACME_ACCOUNTS=true # acme/accounts.cfg +BACKUP_PBS_ACME_PLUGINS=true # acme/plugins.cfg + +# Integrations +BACKUP_PBS_METRIC_SERVERS=true # metricserver.cfg +BACKUP_PBS_TRAFFIC_CONTROL=true # traffic-control.cfg + +# Notifications +BACKUP_PBS_NOTIFICATIONS=true # notifications.cfg (targets/matchers/endpoints) +BACKUP_PBS_NOTIFICATIONS_PRIV=true # notifications-priv.cfg (secrets/credentials for endpoints) + # User and permissions BACKUP_USER_CONFIGS=true # PBS users and tokens @@ -953,19 +1015,16 @@ BACKUP_VERIFICATION_JOBS=true # Backup verification schedules # Tape backup BACKUP_TAPE_CONFIGS=true # Tape library configuration +# Network configuration (PBS) +BACKUP_PBS_NETWORK_CONFIG=true # network.cfg (PBS), independent from BACKUP_NETWORK_CONFIGS (system) + # Prune schedules BACKUP_PRUNE_SCHEDULES=true # Retention prune schedules # PXAR metadata scanning PXAR_SCAN_ENABLE=false # Enable PXAR file metadata collection PXAR_SCAN_DS_CONCURRENCY=3 # Datastores scanned in parallel -PXAR_SCAN_INTRA_CONCURRENCY=4 # Workers per datastore -PXAR_SCAN_FANOUT_LEVEL=2 # Directory depth for fan-out -PXAR_SCAN_MAX_ROOTS=2048 # Max worker roots per datastore -PXAR_STOP_ON_CAP=false # Stop enumeration at max roots -PXAR_ENUM_READDIR_WORKERS=4 # Parallel ReadDir workers -PXAR_ENUM_BUDGET_MS=0 # Time budget for enumeration (0=disabled) -PXAR_FILE_INCLUDE_PATTERN= # Include patterns (default: *.pxar, catalog.pxar*) +PXAR_FILE_INCLUDE_PATTERN= # Include patterns (default: *.pxar, *.pxar.*, catalog.pxar*) PXAR_FILE_EXCLUDE_PATTERN= # Exclude patterns (e.g., *.tmp, *.lock) ``` @@ -973,6 +1032,8 @@ PXAR_FILE_EXCLUDE_PATTERN= # Exclude patterns (e.g., *.tmp, *.lock) **PXAR scanning**: Collects metadata from Proxmox Backup Server .pxar archives. +**Note**: `PXAR_FILE_INCLUDE_PATTERN` and `PXAR_FILE_EXCLUDE_PATTERN` are also reused for file sampling in PVE datastore metadata. Leave them empty to use the built-in defaults per platform. + ### Override Collection Paths ```bash diff --git a/docs/RESTORE_DIAGRAMS.md b/docs/RESTORE_DIAGRAMS.md index 5007fb8..3dca90a 100644 --- a/docs/RESTORE_DIAGRAMS.md +++ b/docs/RESTORE_DIAGRAMS.md @@ -138,6 +138,8 @@ flowchart TD style CheckboxMenu fill:#87CEEB ``` +**Note (PBS)**: ProxSave applies supported PBS staged categories via `proxmox-backup-manager` by default. In **Clean 1:1** mode it may fall back to writing staged `*.cfg` files back to `/etc/proxmox-backup` when API apply is unavailable or fails. + --- ## Service Management Flow diff --git a/docs/RESTORE_GUIDE.md b/docs/RESTORE_GUIDE.md index bbc30aa..bac43c2 100644 --- a/docs/RESTORE_GUIDE.md +++ b/docs/RESTORE_GUIDE.md @@ -86,7 +86,7 @@ Restore operations are organized into **20–22 categories** (PBS = 20, PVE = 22 Each category is handled in one of three ways: - **Normal**: extracted directly to `/` (system paths) after safety backup -- **Staged**: extracted to `/tmp/proxsave/restore-stage-*` and then applied in a controlled way (file copy/validation or `pvesh`); when staged files are written to system paths, ProxSave applies them **atomically** and enforces the final permissions/ownership (including for any created parent directories; not left to `umask`) +- **Staged**: extracted to `/tmp/proxsave/restore-stage-*` and then applied in a controlled way (file copy/validation or API apply: `pvesh`/`pveum` on PVE, `proxmox-backup-manager` on PBS); when staged files are written to system paths, ProxSave applies them **atomically** and enforces the final permissions/ownership (including for any created parent directories; not left to `umask`) - **Export-only**: extracted to an export directory for manual review (never written to system paths) ### PVE-Specific Categories (11 categories) @@ -107,17 +107,23 @@ Each category is handled in one of three ways: ### PBS-Specific Categories (9 categories) +**PBS staged apply behavior**: During restore on PBS, ProxSave prompts you to choose how to reconcile PBS objects: +- **Merge (existing PBS)**: intended for restoring onto an already operational PBS; applies supported PBS categories via `proxmox-backup-manager` without deleting existing objects that are not in the backup. +- **Clean 1:1 (fresh PBS install)**: intended for restoring onto a new, clean PBS; attempts to make supported PBS objects match the backup (may remove objects not in the backup). + +API apply is automatic for supported PBS staged categories; ProxSave may fall back to file-based staged apply only in **Clean 1:1** mode. + | Category | Name | Description | Paths | |----------|------|-------------|-------| | `pbs_config` | PBS Config Export | **Export-only** copy of /etc/proxmox-backup (never written to system) | `./etc/proxmox-backup/` | -| `pbs_host` | PBS Host & Integrations | **Staged** node settings, ACME, proxy, metric servers and traffic control | `./etc/proxmox-backup/node.cfg`
`./etc/proxmox-backup/proxy.cfg`
`./etc/proxmox-backup/acme/accounts.cfg`
`./etc/proxmox-backup/acme/plugins.cfg`
`./etc/proxmox-backup/metricserver.cfg`
`./etc/proxmox-backup/traffic-control.cfg` | -| `datastore_pbs` | PBS Datastore Configuration | **Staged** datastore definitions (incl. S3 endpoints) | `./etc/proxmox-backup/datastore.cfg`
`./etc/proxmox-backup/s3.cfg` | +| `pbs_host` | PBS Host & Integrations | **Staged** node settings, ACME, proxy, metric servers and traffic control (API/file apply) | `./etc/proxmox-backup/node.cfg`
`./etc/proxmox-backup/proxy.cfg`
`./etc/proxmox-backup/acme/accounts.cfg`
`./etc/proxmox-backup/acme/plugins.cfg`
`./etc/proxmox-backup/metricserver.cfg`
`./etc/proxmox-backup/traffic-control.cfg`
`./var/lib/proxsave-info/commands/pbs/node_config.json`
`./var/lib/proxsave-info/commands/pbs/acme_accounts.json`
`./var/lib/proxsave-info/commands/pbs/acme_plugins.json`
`./var/lib/proxsave-info/commands/pbs/acme_account_*_info.json`
`./var/lib/proxsave-info/commands/pbs/acme_plugin_*_config.json`
`./var/lib/proxsave-info/commands/pbs/traffic_control.json` | +| `datastore_pbs` | PBS Datastore Configuration | **Staged** datastore definitions (incl. S3 endpoints) (API/file apply) | `./etc/proxmox-backup/datastore.cfg`
`./etc/proxmox-backup/s3.cfg`
`./var/lib/proxsave-info/commands/pbs/datastore_list.json`
`./var/lib/proxsave-info/commands/pbs/datastore_*_status.json`
`./var/lib/proxsave-info/commands/pbs/s3_endpoints.json`
`./var/lib/proxsave-info/commands/pbs/s3_endpoint_*_buckets.json`
`./var/lib/proxsave-info/commands/pbs/pbs_datastore_inventory.json` | | `maintenance_pbs` | PBS Maintenance | Maintenance settings | `./etc/proxmox-backup/maintenance.cfg` | -| `pbs_jobs` | PBS Jobs | **Staged** sync/verify/prune jobs | `./etc/proxmox-backup/sync.cfg`
`./etc/proxmox-backup/verification.cfg`
`./etc/proxmox-backup/prune.cfg` | -| `pbs_remotes` | PBS Remotes | **Staged** remotes for sync/verify (may include credentials) | `./etc/proxmox-backup/remote.cfg` | -| `pbs_notifications` | PBS Notifications | **Staged** notification targets and matchers | `./etc/proxmox-backup/notifications.cfg`
`./etc/proxmox-backup/notifications-priv.cfg` | -| `pbs_access_control` | PBS Access Control | **Staged** access control + secrets restored 1:1 (root@pam safety rail) | `./etc/proxmox-backup/user.cfg`
`./etc/proxmox-backup/domains.cfg`
`./etc/proxmox-backup/acl.cfg`
`./etc/proxmox-backup/token.cfg`
`./etc/proxmox-backup/shadow.json`
`./etc/proxmox-backup/token.shadow`
`./etc/proxmox-backup/tfa.json` | -| `pbs_tape` | PBS Tape Backup | **Staged** tape config, jobs and encryption keys | `./etc/proxmox-backup/tape.cfg`
`./etc/proxmox-backup/tape-job.cfg`
`./etc/proxmox-backup/media-pool.cfg`
`./etc/proxmox-backup/tape-encryption-keys.json` | +| `pbs_jobs` | PBS Jobs | **Staged** sync/verify/prune jobs (API/file apply) | `./etc/proxmox-backup/sync.cfg`
`./etc/proxmox-backup/verification.cfg`
`./etc/proxmox-backup/prune.cfg`
`./var/lib/proxsave-info/commands/pbs/sync_jobs.json`
`./var/lib/proxsave-info/commands/pbs/verification_jobs.json`
`./var/lib/proxsave-info/commands/pbs/prune_jobs.json`
`./var/lib/proxsave-info/commands/pbs/gc_jobs.json` | +| `pbs_remotes` | PBS Remotes | **Staged** remotes for sync/verify (may include credentials) (API/file apply) | `./etc/proxmox-backup/remote.cfg`
`./var/lib/proxsave-info/commands/pbs/remote_list.json` | +| `pbs_notifications` | PBS Notifications | **Staged** notification targets and matchers (API/file apply) | `./etc/proxmox-backup/notifications.cfg`
`./etc/proxmox-backup/notifications-priv.cfg`
`./var/lib/proxsave-info/commands/pbs/notification_targets.json`
`./var/lib/proxsave-info/commands/pbs/notification_matchers.json`
`./var/lib/proxsave-info/commands/pbs/notification_endpoints_*.json` | +| `pbs_access_control` | PBS Access Control | **Staged** access control + secrets restored 1:1 (root@pam safety rail) | `./etc/proxmox-backup/user.cfg`
`./etc/proxmox-backup/domains.cfg`
`./etc/proxmox-backup/acl.cfg`
`./etc/proxmox-backup/token.cfg`
`./etc/proxmox-backup/shadow.json`
`./etc/proxmox-backup/token.shadow`
`./etc/proxmox-backup/tfa.json`
`./var/lib/proxsave-info/commands/pbs/user_list.json`
`./var/lib/proxsave-info/commands/pbs/realms_ldap.json`
`./var/lib/proxsave-info/commands/pbs/realms_ad.json`
`./var/lib/proxsave-info/commands/pbs/realms_openid.json`
`./var/lib/proxsave-info/commands/pbs/acl_list.json` | +| `pbs_tape` | PBS Tape Backup | **Staged** tape config, jobs and encryption keys | `./etc/proxmox-backup/tape.cfg`
`./etc/proxmox-backup/tape-job.cfg`
`./etc/proxmox-backup/media-pool.cfg`
`./etc/proxmox-backup/tape-encryption-keys.json`
`./var/lib/proxsave-info/commands/pbs/tape_drives.json`
`./var/lib/proxsave-info/commands/pbs/tape_changers.json`
`./var/lib/proxsave-info/commands/pbs/tape_pools.json` | ### Common Categories (11 categories) @@ -225,10 +231,10 @@ Select restore mode: - `zfs` - ZFS configuration **PBS Categories**: -- `datastore_pbs` - Datastore definitions (staged apply) +- `datastore_pbs` - Datastore definitions (staged apply; API preferred, file fallback in Clean 1:1) - `maintenance_pbs` - Maintenance settings -- `pbs_jobs` - Sync/verify/prune jobs (staged apply) -- `pbs_remotes` - Remotes for sync jobs (staged apply) +- `pbs_jobs` - Sync/verify/prune jobs (staged apply; API preferred, file fallback in Clean 1:1) +- `pbs_remotes` - Remotes for sync jobs (staged apply; API preferred, file fallback in Clean 1:1) - `filesystem` - /etc/fstab - `storage_stack` - Storage stack config (mount prerequisites) - `zfs` - ZFS configuration @@ -2371,6 +2377,28 @@ systemctl restart proxmox-backup proxmox-backup-proxy --- +**Issue: "Bad Request (400) parsing /etc/proxmox-backup/datastore.cfg ... duplicate property 'gc-schedule'"** + +**Cause**: `datastore.cfg` is malformed (multiple datastore definitions merged into a single block). This typically happens if the file lost its structure (header/order/indentation), leading PBS to interpret keys like `gc-schedule`, `notification-mode`, or `path` as duplicated **within the same datastore**. + +**Restore behavior**: +- ProxSave detects this condition during staged apply. +- If `var/lib/proxsave-info/commands/pbs/pbs_datastore_inventory.json` is available in the backup, ProxSave will use its embedded snapshot of the original `datastore.cfg` to recover a valid configuration. +- If recovery is not possible, ProxSave will **leave the existing** `/etc/proxmox-backup/datastore.cfg` unchanged to avoid breaking PBS. + +**Manual diagnosis**: +```bash +nl -ba /etc/proxmox-backup/datastore.cfg | sed -n '1,120p' + +# Look for duplicate keys inside the same datastore block: +awk ' +/^datastore: /{ds=$2; delete seen} +/^[[:space:]]*[A-Za-z0-9-]+[[:space:]]+/{key=$1; if(seen[key]++) printf "DUP datastore=%s key=%s line=%d: %s\n", ds, key, NR, $0} +' /etc/proxmox-backup/datastore.cfg +``` + +--- + **Issue: "unable to read prune/verification job config ... syntax error (expected header)"** **Cause**: PBS job config files (`/etc/proxmox-backup/prune.cfg`, `/etc/proxmox-backup/verification.cfg`) are empty or malformed. PBS expects a section header at the first non-comment line; an empty file can trigger parse errors. @@ -2671,7 +2699,7 @@ tar -xzf /path/to/decrypted.tar.gz ./specific/file/path A: Yes: - **Extraction**: ProxSave preserves UID/GID, mode bits and timestamps (mtime/atime) for extracted entries. -- **Staged categories**: files are extracted under `/tmp/proxsave/restore-stage-*` and then applied to system paths using atomic replace; ProxSave explicitly applies mode bits (not left to `umask`) and preserves/derives ownership/group to match expected system defaults (important on PBS, where `proxmox-backup-proxy` runs as `backup`; ProxSave also repairs common `root:root` group regressions by inheriting the destination parent directory's group). +- **Staged categories**: files are extracted under `/tmp/proxsave/restore-stage-*` and then applied to system paths using atomic replace; ProxSave explicitly applies mode bits (not left to `umask`) and preserves/derives ownership/group to match expected system defaults (important on PBS, where `proxmox-backup-proxy` runs as `backup`; ProxSave also repairs common `root:root` group regressions by inheriting the destination parent directory's group). On supported filesystems, staged writes also `fsync()` the temporary file and the destination directory to reduce the risk of incomplete writes after a crash/power loss. - **ctime**: Cannot be set (kernel-managed). --- diff --git a/docs/RESTORE_TECHNICAL.md b/docs/RESTORE_TECHNICAL.md index c7da91b..2bd5c02 100644 --- a/docs/RESTORE_TECHNICAL.md +++ b/docs/RESTORE_TECHNICAL.md @@ -869,6 +869,31 @@ func extractSelectiveArchive( --- +#### Phase 10: Staged Apply (PVE/PBS) + +After extraction, **staged categories** are applied from the staging directory under `/tmp/proxsave/restore-stage-*`. + +**PBS staged apply**: +- Selected interactively during restore on PBS hosts: **Merge (existing PBS)** vs **Clean 1:1 (fresh PBS install)**. +- ProxSave applies supported PBS categories via `proxmox-backup-manager`. + - **Merge**: create/update only (no deletions of existing objects not in the backup). + - **Clean 1:1**: attempts 1:1 reconciliation (may remove objects not present in the backup). +- If API apply is unavailable or fails, ProxSave may fall back to applying staged `*.cfg` files back to `/etc/proxmox-backup` (**Clean 1:1 only**). + +**Current PBS API coverage**: +- `pbs_host`: node + traffic control +- `datastore_pbs`: datastores + S3 endpoints +- `pbs_remotes`: remotes +- `pbs_jobs`: sync/verify/prune jobs +- `pbs_notifications`: notification endpoints/matchers + +Other PBS categories remain file-based (e.g. access control, tape, proxy/ACME/metricserver). + +**Key code paths**: +- `internal/orchestrator/pbs_staged_apply.go` (`maybeApplyPBSConfigsFromStage`) +- `internal/orchestrator/restore_notifications.go` (`maybeApplyNotificationsFromStage`, `pbs_notifications`) +- `internal/orchestrator/pbs_api_apply.go` / `internal/orchestrator/pbs_notifications_api_apply.go` (API apply engines) + ## Category System ### Category Definition Structure @@ -1072,6 +1097,8 @@ func shouldStopPBSServices(categories []Category) bool { } ``` +**API apply note**: When ProxSave applies PBS staged categories via API (`proxmox-backup-manager`), it may start PBS services again during the **staged apply** phase (even if services were stopped earlier for safe file extraction). + ### Error Handling Philosophy **Stop Phase**: **FAIL-FAST** diff --git a/docs/TROUBLESHOOTING.md b/docs/TROUBLESHOOTING.md index 90a15d4..04c7e84 100644 --- a/docs/TROUBLESHOOTING.md +++ b/docs/TROUBLESHOOTING.md @@ -1021,7 +1021,10 @@ A: Use `--dry-run` mode: `./build/proxsave --dry-run --log-level debug` A: Update your configuration: `./build/proxsave --upgrade-config` **Q: Can I run backup while another backup is in progress?** -A: No. Use a lock file (`BACKUP_PATH/.backup.lock`) to prevent concurrent runs. +A: No. Proxsave uses a lock file (`BACKUP_PATH/.backup.lock`) to prevent concurrent runs. The lock stores `pid/host/time`; on the same host, proxsave checks PID liveness to avoid “stuck” locks after an interrupted run. + +**Q: Backup hangs during PVE datastore detection when a network storage is unreachable.** +A: Set `FS_IO_TIMEOUT` to cap how long proxsave waits for filesystem probes (stat/readdir/statfs), and `PVESH_TIMEOUT` to cap `pvesh` calls. This reduces the likelihood of indefinite hangs when a storage becomes unreachable mid-run. **Q: How do I recover from a failed backup?** A: Delete the incomplete backup file and re-run. The system automatically handles cleanup. diff --git a/internal/backup/collector.go b/internal/backup/collector.go index 8fb0d5a..925d298 100644 --- a/internal/backup/collector.go +++ b/internal/backup/collector.go @@ -5,10 +5,7 @@ import ( "context" "errors" "fmt" - "hash/fnv" "io" - "io/fs" - "math/rand" "os" "os/exec" "path/filepath" @@ -43,16 +40,14 @@ type FileSummary struct { // Collector handles backup data collection type Collector struct { - logger *logging.Logger - config *CollectorConfig - stats *CollectionStats - statsMu sync.Mutex - tempDir string - proxType types.ProxmoxType - dryRun bool - rootsMu sync.RWMutex - rootsCache map[string][]string - deps CollectorDeps + logger *logging.Logger + config *CollectorConfig + stats *CollectionStats + statsMu sync.Mutex + tempDir string + proxType types.ProxmoxType + dryRun bool + deps CollectorDeps // clusteredPVE records whether cluster mode was detected during PVE collection. clusteredPVE bool @@ -153,23 +148,27 @@ type CollectorConfig struct { PVEBackupIncludePattern string BackupCephConfig bool CephConfigPath string + PveshTimeoutSeconds int + FsIoTimeoutSeconds int // PBS-specific collection options - BackupDatastoreConfigs bool - BackupPBSS3Endpoints bool - BackupPBSNodeConfig bool - BackupPBSAcmeAccounts bool - BackupPBSAcmePlugins bool - BackupPBSMetricServers bool - BackupPBSTrafficControl bool - BackupUserConfigs bool - BackupRemoteConfigs bool - BackupSyncJobs bool - BackupVerificationJobs bool - BackupTapeConfigs bool - BackupPBSNetworkConfig bool - BackupPruneSchedules bool - BackupPxarFiles bool + BackupDatastoreConfigs bool + BackupPBSS3Endpoints bool + BackupPBSNodeConfig bool + BackupPBSAcmeAccounts bool + BackupPBSAcmePlugins bool + BackupPBSMetricServers bool + BackupPBSTrafficControl bool + BackupPBSNotifications bool + BackupPBSNotificationsPriv bool + BackupUserConfigs bool + BackupRemoteConfigs bool + BackupSyncJobs bool + BackupVerificationJobs bool + BackupTapeConfigs bool + BackupPBSNetworkConfig bool + BackupPruneSchedules bool + BackupPxarFiles bool // System collection options BackupNetworkConfigs bool @@ -193,12 +192,6 @@ type CollectorConfig struct { // PXAR scanning tuning PxarDatastoreConcurrency int - PxarIntraConcurrency int - PxarScanFanoutLevel int - PxarScanMaxRoots int - PxarStopOnCap bool - PxarEnumWorkers int - PxarEnumBudgetMs int PxarFileIncludePatterns []string PxarFileExcludePatterns []string @@ -251,7 +244,7 @@ func (c *CollectorConfig) Validate() error { c.BackupPVEBackupFiles || c.BackupCephConfig || c.BackupDatastoreConfigs || c.BackupPBSS3Endpoints || c.BackupPBSNodeConfig || c.BackupPBSAcmeAccounts || c.BackupPBSAcmePlugins || c.BackupPBSMetricServers || - c.BackupPBSTrafficControl || c.BackupUserConfigs || c.BackupRemoteConfigs || + c.BackupPBSTrafficControl || c.BackupPBSNotifications || c.BackupUserConfigs || c.BackupRemoteConfigs || c.BackupSyncJobs || c.BackupVerificationJobs || c.BackupTapeConfigs || c.BackupPBSNetworkConfig || c.BackupPruneSchedules || c.BackupPxarFiles || c.BackupNetworkConfigs || c.BackupAptSources || c.BackupCronJobs || @@ -267,24 +260,15 @@ func (c *CollectorConfig) Validate() error { if c.PxarDatastoreConcurrency <= 0 { c.PxarDatastoreConcurrency = 3 } - if c.PxarIntraConcurrency <= 0 { - c.PxarIntraConcurrency = 4 - } - if c.PxarScanFanoutLevel <= 0 { - c.PxarScanFanoutLevel = 1 - } - if c.PxarScanMaxRoots <= 0 { - c.PxarScanMaxRoots = 2048 - } - if c.PxarEnumWorkers <= 0 { - c.PxarEnumWorkers = 4 - } - if c.PxarEnumBudgetMs < 0 { - c.PxarEnumBudgetMs = 0 - } if c.MaxPVEBackupSizeBytes < 0 { return fmt.Errorf("MAX_PVE_BACKUP_SIZE must be >= 0") } + if c.PveshTimeoutSeconds < 0 { + c.PveshTimeoutSeconds = 15 + } + if c.FsIoTimeoutSeconds < 0 { + c.FsIoTimeoutSeconds = 30 + } if c.SystemRootPrefix != "" && !filepath.IsAbs(c.SystemRootPrefix) { return fmt.Errorf("system root prefix must be an absolute path") } @@ -300,14 +284,13 @@ func NewCollector(logger *logging.Logger, config *CollectorConfig, tempDir strin // NewCollectorWithDeps creates a collector with explicit dependency overrides (for testing). func NewCollectorWithDeps(logger *logging.Logger, config *CollectorConfig, tempDir string, proxType types.ProxmoxType, dryRun bool, deps CollectorDeps) *Collector { return &Collector{ - logger: logger, - config: config, - stats: &CollectionStats{}, - tempDir: tempDir, - proxType: proxType, - dryRun: dryRun, - rootsCache: make(map[string][]string), - deps: deps, + logger: logger, + config: config, + stats: &CollectionStats{}, + tempDir: tempDir, + proxType: proxType, + dryRun: dryRun, + deps: deps, } } @@ -329,23 +312,27 @@ func GetDefaultCollectorConfig() *CollectorConfig { PVEBackupIncludePattern: "", BackupCephConfig: true, CephConfigPath: "/etc/ceph", + PveshTimeoutSeconds: 15, + FsIoTimeoutSeconds: 30, // PBS-specific (all enabled by default) - BackupDatastoreConfigs: true, - BackupPBSS3Endpoints: true, - BackupPBSNodeConfig: true, - BackupPBSAcmeAccounts: true, - BackupPBSAcmePlugins: true, - BackupPBSMetricServers: true, - BackupPBSTrafficControl: true, - BackupUserConfigs: true, - BackupRemoteConfigs: true, - BackupSyncJobs: true, - BackupVerificationJobs: true, - BackupTapeConfigs: true, - BackupPBSNetworkConfig: true, - BackupPruneSchedules: true, - BackupPxarFiles: true, + BackupDatastoreConfigs: true, + BackupPBSS3Endpoints: true, + BackupPBSNodeConfig: true, + BackupPBSAcmeAccounts: true, + BackupPBSAcmePlugins: true, + BackupPBSMetricServers: true, + BackupPBSTrafficControl: true, + BackupPBSNotifications: true, + BackupPBSNotificationsPriv: true, + BackupUserConfigs: true, + BackupRemoteConfigs: true, + BackupSyncJobs: true, + BackupVerificationJobs: true, + BackupTapeConfigs: true, + BackupPBSNetworkConfig: true, + BackupPruneSchedules: true, + BackupPxarFiles: true, // System collection (all enabled by default) BackupNetworkConfigs: true, @@ -368,11 +355,6 @@ func GetDefaultCollectorConfig() *CollectorConfig { SystemRootPrefix: "", PxarDatastoreConcurrency: 3, - PxarIntraConcurrency: 4, - PxarScanFanoutLevel: 2, - PxarScanMaxRoots: 2048, - PxarEnumWorkers: 4, - PxarEnumBudgetMs: 0, PxarFileIncludePatterns: nil, PxarFileExcludePatterns: nil, @@ -918,7 +900,16 @@ func (c *Collector) safeCmdOutput(ctx context.Context, cmd, output, description } cmdString := strings.Join(cmdParts, " ") - out, err := c.depRunCommand(ctx, cmdParts[0], cmdParts[1:]...) + runCtx := ctx + var cancel context.CancelFunc + if cmdParts[0] == "pvesh" && c.config != nil && c.config.PveshTimeoutSeconds > 0 { + runCtx, cancel = context.WithTimeout(ctx, time.Duration(c.config.PveshTimeoutSeconds)*time.Second) + } + if cancel != nil { + defer cancel() + } + + out, err := c.depRunCommand(runCtx, cmdParts[0], cmdParts[1:]...) if err != nil { if critical { c.incFilesFailed() @@ -1222,7 +1213,16 @@ func (c *Collector) captureCommandOutput(ctx context.Context, cmd, output, descr return nil, nil } - out, err := c.depRunCommand(ctx, parts[0], parts[1:]...) + runCtx := ctx + var cancel context.CancelFunc + if parts[0] == "pvesh" && c.config != nil && c.config.PveshTimeoutSeconds > 0 { + runCtx, cancel = context.WithTimeout(ctx, time.Duration(c.config.PveshTimeoutSeconds)*time.Second) + } + if cancel != nil { + defer cancel() + } + + out, err := c.depRunCommand(runCtx, parts[0], parts[1:]...) if err != nil { cmdString := strings.Join(parts, " ") if critical { @@ -1320,807 +1320,6 @@ func (c *Collector) collectCommandOptional(ctx context.Context, cmd, output, des } } -func (c *Collector) sampleDirectories(ctx context.Context, root string, maxDepth, limit int) ([]string, error) { - results := make([]string, 0, limit) - if limit <= 0 { - return results, nil - } - - startDirs, err := c.computePxarWorkerRoots(ctx, root, "directories") - if err != nil { - return results, err - } - - if len(startDirs) == 0 { - c.logger.Debug("PXAR sampleDirectories: root=%s completed (selected=0 visited=0 duration=0s)", root) - return results, nil - } - - stopErr := errors.New("directory sample limit reached") - start := time.Now() - ctx, cancel := context.WithCancel(ctx) - defer cancel() - - workerLimit := c.config.PxarIntraConcurrency - if workerLimit <= 0 { - workerLimit = 1 - } - - var ( - wg sync.WaitGroup - sem = make(chan struct{}, workerLimit) - resMu sync.Mutex - progressMu sync.Mutex - errMu sync.Mutex - visited int - lastLog = start - firstErr error - limitReached bool - ) - - appendResult := func(rel string) (bool, bool) { - resMu.Lock() - defer resMu.Unlock() - if limitReached { - return false, true - } - results = append(results, filepath.ToSlash(rel)) - if len(results) >= limit { - limitReached = true - cancel() - return true, true - } - return true, false - } - - logProgress := func() { - progressMu.Lock() - defer progressMu.Unlock() - visited++ - if time.Since(lastLog) > 2*time.Second { - resMu.Lock() - selected := len(results) - resMu.Unlock() - c.logger.Debug("PXAR sampleDirectories: root=%s visited=%d selected=%d", root, visited, selected) - lastLog = time.Now() - } - } - - recordError := func(err error) { - errMu.Lock() - if firstErr == nil { - firstErr = err - cancel() - } - errMu.Unlock() - } - - for _, startPath := range startDirs { - if err := ctx.Err(); err != nil { - break - } - wg.Add(1) - sem <- struct{}{} - go func(startDir string) { - defer func() { - <-sem - wg.Done() - }() - - walkErr := filepath.WalkDir(startDir, func(path string, d fs.DirEntry, walkErr error) error { - if walkErr != nil { - return walkErr - } - - if err := ctx.Err(); err != nil { - return err - } - - if path == root { - return nil - } - - if c.shouldExclude(path) { - if d.IsDir() { - return filepath.SkipDir - } - return nil - } - - rel, relErr := filepath.Rel(root, path) - if relErr != nil { - return relErr - } - - if d.IsDir() { - logProgress() - depth := strings.Count(rel, string(filepath.Separator)) - if depth >= maxDepth { - return filepath.SkipDir - } - if _, hitLimit := appendResult(rel); hitLimit { - return stopErr - } - } - return nil - }) - - if walkErr != nil && !errors.Is(walkErr, stopErr) && !errors.Is(walkErr, context.Canceled) { - recordError(walkErr) - } - }(startPath) - } - - wg.Wait() - - if firstErr != nil { - return results, firstErr - } - resMu.Lock() - limitWasReached := limitReached - resMu.Unlock() - - if err := ctx.Err(); err != nil && !errors.Is(err, context.Canceled) && !limitWasReached { - return results, err - } - - resMu.Lock() - selected := len(results) - resMu.Unlock() - - progressMu.Lock() - totalVisited := visited - progressMu.Unlock() - - c.logger.Debug("PXAR sampleDirectories: root=%s completed (selected=%d visited=%d duration=%s)", - root, selected, totalVisited, time.Since(start).Truncate(time.Millisecond)) - return results, nil -} - -func (c *Collector) sampleFiles(ctx context.Context, root string, includePatterns, excludePatterns []string, maxDepth, limit int) ([]FileSummary, error) { - results := make([]FileSummary, 0, limit) - if limit <= 0 { - return results, nil - } - - entries, err := os.ReadDir(root) - if err != nil { - return results, err - } - - stopErr := errors.New("file sample limit reached") - start := time.Now() - ctx, cancel := context.WithCancel(ctx) - defer cancel() - - workerLimit := c.config.PxarIntraConcurrency - if workerLimit <= 0 { - workerLimit = 1 - } - - var ( - wg sync.WaitGroup - sem = make(chan struct{}, workerLimit) - resMu sync.Mutex - progressMu sync.Mutex - errMu sync.Mutex - visited int - matched int - lastLog = start - firstErr error - limitReached bool - ) - - appendResult := func(summary FileSummary) (bool, bool) { - resMu.Lock() - defer resMu.Unlock() - if limitReached { - return false, true - } - results = append(results, summary) - if len(results) >= limit { - limitReached = true - cancel() - return true, true - } - return true, false - } - - logProgress := func() { - progressMu.Lock() - defer progressMu.Unlock() - visited++ - if time.Since(lastLog) > 2*time.Second { - resMu.Lock() - selected := len(results) - resMu.Unlock() - c.logger.Debug("PXAR sampleFiles: root=%s visited=%d matched=%d selected=%d", root, visited, matched, selected) - lastLog = time.Now() - } - } - - incMatched := func() { - progressMu.Lock() - matched++ - progressMu.Unlock() - } - - recordError := func(err error) { - errMu.Lock() - if firstErr == nil { - firstErr = err - cancel() - } - errMu.Unlock() - } - - processFile := func(path string, info fs.FileInfo) error { - if c.shouldExclude(path) { - return nil - } - rel, err := filepath.Rel(root, path) - if err != nil { - return err - } - logProgress() - - if len(excludePatterns) > 0 && matchAnyPattern(excludePatterns, filepath.Base(path), rel) { - return nil - } - if len(includePatterns) > 0 && !matchAnyPattern(includePatterns, filepath.Base(path), rel) { - return nil - } - incMatched() - - summary := FileSummary{ - RelativePath: filepath.ToSlash(rel), - SizeBytes: info.Size(), - SizeHuman: FormatBytes(info.Size()), - ModTime: info.ModTime(), - } - if _, hitLimit := appendResult(summary); hitLimit { - return stopErr - } - return nil - } - - limitTriggered := false - - for _, entry := range entries { - path := filepath.Join(root, entry.Name()) - if entry.IsDir() { - continue - } - - info, infoErr := entry.Info() - if infoErr != nil { - continue - } - if err := processFile(path, info); err != nil { - if errors.Is(err, stopErr) { - limitTriggered = true - break - } - return results, err - } - } - - if limitTriggered { - resMu.Lock() - selected := len(results) - resMu.Unlock() - progressMu.Lock() - totalVisited := visited - totalMatched := matched - progressMu.Unlock() - c.logger.Debug("PXAR sampleFiles: root=%s completed (selected=%d matched=%d visited=%d duration=%s)", - root, selected, totalMatched, totalVisited, time.Since(start).Truncate(time.Millisecond)) - return results, nil - } - - startDirs, err := c.computePxarWorkerRoots(ctx, root, "files") - if err != nil { - return results, err - } - - if len(startDirs) == 0 { - resMu.Lock() - selected := len(results) - resMu.Unlock() - progressMu.Lock() - totalVisited := visited - totalMatched := matched - progressMu.Unlock() - c.logger.Debug("PXAR sampleFiles: root=%s completed (selected=%d matched=%d visited=%d duration=%s)", - root, selected, totalMatched, totalVisited, time.Since(start).Truncate(time.Millisecond)) - return results, nil - } - - for _, startPath := range startDirs { - if err := ctx.Err(); err != nil { - break - } - wg.Add(1) - sem <- struct{}{} - go func(startDir string) { - defer func() { - <-sem - wg.Done() - }() - - walkErr := filepath.WalkDir(startDir, func(path string, d fs.DirEntry, walkErr error) error { - if walkErr != nil { - return walkErr - } - - if err := ctx.Err(); err != nil { - return err - } - - if d.IsDir() { - if c.shouldExclude(path) { - return filepath.SkipDir - } - rel, relErr := filepath.Rel(root, path) - if relErr != nil { - return relErr - } - depth := strings.Count(rel, string(filepath.Separator)) - if depth >= maxDepth { - return filepath.SkipDir - } - return nil - } - - info, infoErr := d.Info() - if infoErr != nil { - return nil - } - return processFile(path, info) - }) - - if walkErr != nil && !errors.Is(walkErr, stopErr) && !errors.Is(walkErr, context.Canceled) { - recordError(walkErr) - } - }(startPath) - } - - wg.Wait() - - if firstErr != nil { - return results, firstErr - } - - resMu.Lock() - limitWasReached := limitReached - selected := len(results) - resMu.Unlock() - - if err := ctx.Err(); err != nil && !errors.Is(err, context.Canceled) && !limitWasReached { - return results, err - } - - progressMu.Lock() - totalVisited := visited - totalMatched := matched - progressMu.Unlock() - - c.logger.Debug("PXAR sampleFiles: root=%s completed (selected=%d matched=%d visited=%d duration=%s)", - root, selected, totalMatched, totalVisited, time.Since(start).Truncate(time.Millisecond)) - return results, nil -} - -func (c *Collector) computePxarWorkerRoots(ctx context.Context, root, purpose string) ([]string, error) { - cacheKey := fmt.Sprintf("%s|fanout=%d|max=%d", root, c.config.PxarScanFanoutLevel, c.config.PxarScanMaxRoots) - c.rootsMu.RLock() - if cached, ok := c.rootsCache[cacheKey]; ok && len(cached) > 0 { - result := append([]string(nil), cached...) - c.rootsMu.RUnlock() - c.logger.Debug("PXAR worker roots cache hit (%s): root=%s count=%d", purpose, root, len(result)) - return result, nil - } - c.rootsMu.RUnlock() - - fanout := c.config.PxarScanFanoutLevel - if fanout < 1 { - fanout = 1 - } - maxRoots := c.config.PxarScanMaxRoots - if maxRoots <= 0 { - maxRoots = 2048 - } - enumWorkers := c.config.PxarEnumWorkers - if enumWorkers <= 0 { - enumWorkers = 1 - } - budgetMs := c.config.PxarEnumBudgetMs - - baseCtx, baseCancel := context.WithCancel(ctx) - defer baseCancel() - ctxFanout := baseCtx - if budgetMs > 0 { - ctxBudget, cancel := context.WithTimeout(baseCtx, time.Duration(budgetMs)*time.Millisecond) - ctxFanout = ctxBudget - defer cancel() - } - - start := time.Now() - c.logger.Debug("PXAR fanout enumeration (%s): root=%s fanout=%d max_roots=%d workers=%d budget_ms=%d", - purpose, root, fanout, maxRoots, enumWorkers, budgetMs) - - levels := make(map[int][]string, fanout) - selector := newPxarRootSelector(maxRoots) - var selectorMu sync.Mutex - queue := []string{root} - var foundAny atomic.Bool - stopOnCap := c.config.PxarStopOnCap - - const ( - pxarStopReasonNone int32 = iota - pxarStopReasonCap - pxarStopReasonBudget - ) - var stopReason atomic.Int32 - - var progressVisited atomic.Int64 - var progressScanned atomic.Int64 - var progressExcluded atomic.Int64 - var progressLeaves atomic.Int64 - var progressReadErr atomic.Int64 - var progressDepth atomic.Int64 - var progressCandidates atomic.Int64 - var progressCapped atomic.Bool - - var progressStop chan struct{} - if c.logger.GetLevel() >= types.LogLevelDebug { - progressStop = make(chan struct{}) - ticker := time.NewTicker(5 * time.Second) - go func() { - defer ticker.Stop() - for { - select { - case <-ticker.C: - c.logger.Debug("PXAR progress (%s): depth=%d visited=%d scanned=%d excluded=%d leaves=%d candidates=%d capped=%v elapsed=%s", - purpose, - progressDepth.Load(), - progressVisited.Load(), - progressScanned.Load(), - progressExcluded.Load(), - progressLeaves.Load(), - progressCandidates.Load(), - progressCapped.Load(), - time.Since(start).Truncate(time.Millisecond)) - case <-progressStop: - return - case <-ctxFanout.Done(): - return - } - } - }() - defer close(progressStop) - } - -fanoutLoop: - for depth := 0; depth < fanout; depth++ { - if len(queue) == 0 { - break - } - if err := ctxFanout.Err(); err != nil { - break - } - - progressDepth.Store(int64(depth + 1)) - next := make([]string, 0, len(queue)) - var nextMu sync.Mutex - - jobCh := make(chan string) - var wg sync.WaitGroup - - workerCount := enumWorkers - if workerCount > len(queue) { - workerCount = len(queue) - } - if workerCount < 1 { - workerCount = 1 - } - - shuffledBases := append([]string(nil), queue...) - shuffleStringsDeterministic(shuffledBases, deterministicSeed(root, purpose, fmt.Sprintf("depth-%d", depth))) - - for w := 0; w < workerCount; w++ { - wg.Add(1) - go func() { - defer wg.Done() - for basePath := range jobCh { - if err := ctxFanout.Err(); err != nil { - return - } - - progressVisited.Add(1) - entries, err := os.ReadDir(basePath) - if err != nil { - progressReadErr.Add(1) - continue - } - progressScanned.Add(int64(len(entries))) - shuffleDirEntriesDeterministic(entries, deterministicSeed(basePath, purpose, fmt.Sprintf("depth-%d", depth))) - - for _, entry := range entries { - if err := ctxFanout.Err(); err != nil { - return - } - - if !entry.IsDir() { - continue - } - child := filepath.Join(basePath, entry.Name()) - if c.shouldExclude(child) { - progressExcluded.Add(1) - continue - } - foundAny.Store(true) - - level := depth + 1 - if level < fanout { - nextMu.Lock() - levels[level] = append(levels[level], child) - next = append(next, child) - nextMu.Unlock() - continue - } - - selectorMu.Lock() - prevCapped := selector.capped - selector.consider(child) - progressLeaves.Add(1) - progressCandidates.Store(int64(selector.total)) - currentCapped := selector.capped - selectorMu.Unlock() - - if !prevCapped && currentCapped { - progressCapped.Store(true) - c.logger.Debug("PXAR progress (%s): candidate cap reached (limit=%d) at total=%d", - purpose, maxRoots, selector.total) - if stopOnCap { - if stopReason.CompareAndSwap(pxarStopReasonNone, pxarStopReasonCap) { - c.logger.Debug("PXAR early termination (%s): stop_on_cap=true limit=%d candidates=%d depth=%d elapsed=%s", - purpose, - maxRoots, - selector.total, - depth+1, - time.Since(start).Truncate(time.Millisecond)) - } - baseCancel() - return - } - } - } - } - }() - } - - for _, base := range shuffledBases { - select { - case <-ctxFanout.Done(): - break fanoutLoop - default: - jobCh <- base - } - } - close(jobCh) - wg.Wait() - - if err := ctxFanout.Err(); err != nil { - break - } - - c.logger.Debug("PXAR depth %d/%d done: bases=%d next_bases=%d leaves=%d excluded=%d readErrs=%d elapsed=%s", - depth+1, - fanout, - len(queue), - len(next), - progressLeaves.Load(), - progressExcluded.Load(), - progressReadErr.Load(), - time.Since(start).Truncate(time.Millisecond)) - queue = next - } - - if budgetMs > 0 && errors.Is(ctxFanout.Err(), context.DeadlineExceeded) { - stopReason.CompareAndSwap(pxarStopReasonNone, pxarStopReasonBudget) - c.logger.Debug("PXAR early termination (%s): enumeration budget exceeded (%dms)", purpose, budgetMs) - } - - if !foundAny.Load() { - return nil, nil - } - - roots := selector.results() - capped := selector.capped - totalCandidates := selector.total - if len(roots) == 0 { - for level := fanout - 1; level >= 1; level-- { - if dirs := levels[level]; len(dirs) > 0 { - c.logger.Debug("PXAR fallback to level=%d: dirs=%d (limit=%d)", level, len(dirs), maxRoots) - roots = uniquePaths(dirs) - totalCandidates = len(dirs) - if maxRoots > 0 && len(roots) > maxRoots { - c.logger.Debug("PXAR downsample: from=%d to=%d", len(roots), maxRoots) - roots = downsampleRoots(roots, maxRoots) - capped = true - } - break - } - } - } - - if len(roots) == 0 { - return nil, nil - } - - c.logger.Debug("PXAR worker roots (%s): root=%s fanout=%d count=%d candidates=%d capped=%v duration=%s", - purpose, - root, - fanout, - len(roots), - totalCandidates, - capped, - time.Since(start).Truncate(time.Millisecond)) - c.rootsMu.Lock() - c.rootsCache[cacheKey] = append([]string(nil), roots...) - c.rootsMu.Unlock() - return roots, nil -} - -func downsampleRoots(roots []string, limit int) []string { - if limit <= 0 || len(roots) <= limit { - return roots - } - step := len(roots) / limit - if step <= 1 { - return roots[:limit] - } - result := make([]string, 0, limit) - for i := 0; i < len(roots) && len(result) < limit; i += step { - result = append(result, roots[i]) - } - if len(result) < limit { - for i := len(roots) - 1; i >= 0 && len(result) < limit; i-- { - result = append(result, roots[i]) - } - } - if len(result) > limit { - result = result[:limit] - } - return result -} - -func shuffleStringsDeterministic(items []string, seed int64) { - if len(items) <= 1 { - return - } - r := rand.New(rand.NewSource(seed)) - for i := len(items) - 1; i > 0; i-- { - j := r.Intn(i + 1) - items[i], items[j] = items[j], items[i] - } -} - -func shuffleDirEntriesDeterministic(entries []fs.DirEntry, seed int64) { - if len(entries) <= 1 { - return - } - r := rand.New(rand.NewSource(seed)) - for i := len(entries) - 1; i > 0; i-- { - j := r.Intn(i + 1) - entries[i], entries[j] = entries[j], entries[i] - } -} - -func deterministicSeed(parts ...string) int64 { - hasher := fnv.New64a() - for _, p := range parts { - _, _ = hasher.Write([]byte(p)) - _, _ = hasher.Write([]byte{0}) - } - return int64(hasher.Sum64()) -} - -type pxarRootCandidate struct { - path string - weight uint32 -} - -type pxarRootSelector struct { - limit int - items []pxarRootCandidate - total int - capped bool - maxIdx int - maxWeight uint32 -} - -func newPxarRootSelector(limit int) *pxarRootSelector { - return &pxarRootSelector{ - limit: limit, - maxIdx: -1, - } -} - -func (s *pxarRootSelector) consider(path string) { - s.total++ - if s.limit <= 0 { - s.items = append(s.items, pxarRootCandidate{path: path}) - return - } - weight := hashPath(path) - if len(s.items) < s.limit { - s.items = append(s.items, pxarRootCandidate{path: path, weight: weight}) - if weight > s.maxWeight || s.maxIdx == -1 { - s.maxWeight = weight - s.maxIdx = len(s.items) - 1 - } - return - } - s.capped = true - if weight >= s.maxWeight { - return - } - s.items[s.maxIdx] = pxarRootCandidate{path: path, weight: weight} - s.recomputeMax() -} - -func (s *pxarRootSelector) recomputeMax() { - if len(s.items) == 0 { - s.maxIdx = -1 - s.maxWeight = 0 - return - } - maxIdx := 0 - maxWeight := s.items[0].weight - for i := 1; i < len(s.items); i++ { - if s.items[i].weight > maxWeight { - maxWeight = s.items[i].weight - maxIdx = i - } - } - s.maxIdx = maxIdx - s.maxWeight = maxWeight -} - -func (s *pxarRootSelector) results() []string { - if len(s.items) == 0 { - return nil - } - roots := make([]string, len(s.items)) - for i, item := range s.items { - roots[i] = item.path - } - return uniquePaths(roots) -} - -func hashPath(path string) uint32 { - h := fnv.New32a() - _, _ = h.Write([]byte(path)) - return h.Sum32() -} - -func uniquePaths(paths []string) []string { - if len(paths) == 0 { - return paths - } - seen := make(map[string]struct{}, len(paths)) - unique := make([]string, 0, len(paths)) - for _, path := range paths { - if _, ok := seen[path]; ok { - continue - } - seen[path] = struct{}{} - unique = append(unique, path) - } - return unique -} - func matchAnyPattern(patterns []string, name, relative string) bool { if len(patterns) == 0 { return true diff --git a/internal/backup/collector_collectall_test.go b/internal/backup/collector_collectall_test.go index aca9062..1bac94e 100644 --- a/internal/backup/collector_collectall_test.go +++ b/internal/backup/collector_collectall_test.go @@ -59,9 +59,9 @@ func TestCollectorCollectAll_PVEBranchWrapsCollectionError(t *testing.T) { collector := NewCollector(logger, cfg, t.TempDir(), types.ProxmoxVE, false) err := collector.CollectAll(context.Background()) if err == nil { - t.Fatalf("expected error, got %v", err) + t.Fatalf("expected error, got nil") } - if err == nil || !strings.Contains(err.Error(), "PVE collection failed:") { + if !strings.Contains(err.Error(), "PVE collection failed:") { t.Fatalf("expected wrapped PVE collection error, got %v", err) } } diff --git a/internal/backup/collector_config_extra_test.go b/internal/backup/collector_config_extra_test.go index ae15aa4..e13f6cd 100644 --- a/internal/backup/collector_config_extra_test.go +++ b/internal/backup/collector_config_extra_test.go @@ -28,7 +28,7 @@ func TestCollectorConfigValidateDefaultsAndErrors(t *testing.T) { if err := cfg.Validate(); err != nil { t.Fatalf("unexpected error for minimal valid config: %v", err) } - if cfg.PxarDatastoreConcurrency != 3 || cfg.PxarIntraConcurrency != 4 || cfg.PxarScanFanoutLevel != 1 || cfg.PxarScanMaxRoots != 2048 || cfg.PxarEnumWorkers != 4 { + if cfg.PxarDatastoreConcurrency != 3 { t.Fatalf("defaults not applied correctly: %+v", cfg) } } @@ -69,17 +69,6 @@ func TestCollectorConfigValidateEmptyExcludePattern(t *testing.T) { } } -func TestCollectorConfigValidateNormalizesNegativeBudget(t *testing.T) { - cfg := &CollectorConfig{BackupVMConfigs: true} - cfg.PxarEnumBudgetMs = -1 - if err := cfg.Validate(); err != nil { - t.Fatalf("unexpected error: %v", err) - } - if cfg.PxarEnumBudgetMs != 0 { - t.Fatalf("expected PxarEnumBudgetMs to be normalized to 0, got %d", cfg.PxarEnumBudgetMs) - } -} - func TestCollectorConfigValidateRequiresAbsoluteSystemRootPrefix(t *testing.T) { cfg := &CollectorConfig{BackupVMConfigs: true} cfg.SystemRootPrefix = "relative/path" diff --git a/internal/backup/collector_paths.go b/internal/backup/collector_paths.go index 7b39b64..83c6ec4 100644 --- a/internal/backup/collector_paths.go +++ b/internal/backup/collector_paths.go @@ -20,4 +20,3 @@ func (c *Collector) proxsaveCommandsDir(component string) string { func (c *Collector) proxsaveRuntimeDir(component string) string { return c.proxsaveInfoDir("runtime", component) } - diff --git a/internal/backup/collector_pbs.go b/internal/backup/collector_pbs.go index 6759b00..98212e9 100644 --- a/internal/backup/collector_pbs.go +++ b/internal/backup/collector_pbs.go @@ -214,6 +214,11 @@ func (c *Collector) collectPBSDirectories(ctx context.Context, root string) erro if !c.config.BackupPBSTrafficControl { extraExclude = append(extraExclude, "traffic-control.cfg") } + if !c.config.BackupPBSNotifications { + extraExclude = append(extraExclude, "notifications.cfg", "notifications-priv.cfg") + } else if !c.config.BackupPBSNotificationsPriv { + extraExclude = append(extraExclude, "notifications-priv.cfg") + } if !c.config.BackupUserConfigs { // User-related configs are intentionally excluded together. extraExclude = append(extraExclude, "user.cfg", "acl.cfg", "domains.cfg") @@ -281,6 +286,17 @@ func (c *Collector) collectPBSDirectories(ctx context.Context, root string) erro c.pbsManifest["traffic-control.cfg"] = c.collectPBSConfigFile(ctx, root, "traffic-control.cfg", "Traffic control rules", c.config.BackupPBSTrafficControl, "BACKUP_PBS_TRAFFIC_CONTROL") + // Notifications (targets/endpoints + matcher routing; secrets are stored in notifications-priv.cfg) + c.pbsManifest["notifications.cfg"] = c.collectPBSConfigFile(ctx, root, "notifications.cfg", + "Notifications configuration", c.config.BackupPBSNotifications, "BACKUP_PBS_NOTIFICATIONS") + privEnabled := c.config.BackupPBSNotifications && c.config.BackupPBSNotificationsPriv + privDisableHint := "BACKUP_PBS_NOTIFICATIONS_PRIV" + if !c.config.BackupPBSNotifications { + privDisableHint = "BACKUP_PBS_NOTIFICATIONS" + } + c.pbsManifest["notifications-priv.cfg"] = c.collectPBSConfigFile(ctx, root, "notifications-priv.cfg", + "Notifications secrets", privEnabled, privDisableHint) + // User configuration c.pbsManifest["user.cfg"] = c.collectPBSConfigFile(ctx, root, "user.cfg", "User configuration", c.config.BackupUserConfigs, "BACKUP_USER_CONFIGS") @@ -381,7 +397,10 @@ func (c *Collector) collectPBSCommands(ctx context.Context, datastores []pbsData } // Notifications (targets, matchers, endpoints) - c.collectPBSNotificationSnapshots(ctx, commandsDir) + if c.config.BackupPBSNotifications { + c.collectPBSNotificationSnapshots(ctx, commandsDir) + c.writePBSNotificationSummary(commandsDir) + } // User list if c.config.BackupUserConfigs { diff --git a/internal/backup/collector_pbs_commands_coverage_test.go b/internal/backup/collector_pbs_commands_coverage_test.go index 9cac09a..b96c44f 100644 --- a/internal/backup/collector_pbs_commands_coverage_test.go +++ b/internal/backup/collector_pbs_commands_coverage_test.go @@ -8,6 +8,7 @@ import ( "path/filepath" "strings" "testing" + "time" "github.com/tis24dev/proxsave/internal/pbs" "github.com/tis24dev/proxsave/internal/types" @@ -56,6 +57,7 @@ func TestCollectPBSCommandsWritesExpectedOutputs(t *testing.T) { "notification_endpoints_sendmail.json", "notification_endpoints_gotify.json", "notification_endpoints_webhook.json", + "notifications_summary.json", "user_list.json", "realms_ldap.json", "realms_ad.json", @@ -117,7 +119,6 @@ func TestCollectPBSPxarMetadataProcessesMultipleDatastores(t *testing.T) { tmp := t.TempDir() cfg := GetDefaultCollectorConfig() cfg.PxarDatastoreConcurrency = 2 - cfg.PxarIntraConcurrency = 1 collector := NewCollector(newTestLogger(), cfg, tmp, types.ProxmoxBS, false) @@ -205,7 +206,7 @@ func TestCollectPBSPxarMetadataReturnsErrorWhenTempVarIsFile(t *testing.T) { func TestCollectDatastoreConfigsCreatesConfigAndNamespaceFiles(t *testing.T) { origList := listNamespacesFunc t.Cleanup(func() { listNamespacesFunc = origList }) - listNamespacesFunc = func(name, path string) ([]pbs.Namespace, bool, error) { + listNamespacesFunc = func(context.Context, string, string, time.Duration) ([]pbs.Namespace, bool, error) { return []pbs.Namespace{{Ns: "root", Path: "/"}}, false, nil } @@ -313,14 +314,13 @@ func TestCollectPBSConfigsEndToEndWithStubs(t *testing.T) { origList := listNamespacesFunc t.Cleanup(func() { listNamespacesFunc = origList }) - listNamespacesFunc = func(name, path string) ([]pbs.Namespace, bool, error) { + listNamespacesFunc = func(context.Context, string, string, time.Duration) ([]pbs.Namespace, bool, error) { return []pbs.Namespace{{Ns: "root", Path: "/"}}, false, nil } cfg := GetDefaultCollectorConfig() cfg.PBSConfigPath = pbsRoot cfg.PxarDatastoreConcurrency = 2 - cfg.PxarIntraConcurrency = 1 deps := CollectorDeps{ LookPath: func(name string) (string, error) { return "/bin/" + name, nil }, @@ -400,7 +400,6 @@ func TestCollectPBSPxarMetadataStopsOnFirstDatastoreError(t *testing.T) { tmp := t.TempDir() cfg := GetDefaultCollectorConfig() cfg.PxarDatastoreConcurrency = 2 - cfg.PxarIntraConcurrency = 1 collector := NewCollector(newTestLogger(), cfg, tmp, types.ProxmoxBS, false) diff --git a/internal/backup/collector_pbs_datastore.go b/internal/backup/collector_pbs_datastore.go index 1b3d02c..0b5c2ba 100644 --- a/internal/backup/collector_pbs_datastore.go +++ b/internal/backup/collector_pbs_datastore.go @@ -13,6 +13,7 @@ import ( "time" "github.com/tis24dev/proxsave/internal/pbs" + "github.com/tis24dev/proxsave/internal/safefs" ) type pbsDatastore struct { @@ -45,7 +46,7 @@ func (c *Collector) collectDatastoreConfigs(ctx context.Context, datastores []pb false) // Get namespace list using CLI/Filesystem fallback - if err := c.collectDatastoreNamespaces(ds, datastoreDir); err != nil { + if err := c.collectDatastoreNamespaces(ctx, ds, datastoreDir); err != nil { c.logger.Debug("Failed to collect namespaces for datastore %s: %v", ds.Name, err) } } @@ -56,7 +57,7 @@ func (c *Collector) collectDatastoreConfigs(ctx context.Context, datastores []pb // collectDatastoreNamespaces collects namespace information for a datastore // using CLI first, then filesystem fallback. -func (c *Collector) collectDatastoreNamespaces(ds pbsDatastore, datastoreDir string) error { +func (c *Collector) collectDatastoreNamespaces(ctx context.Context, ds pbsDatastore, datastoreDir string) error { c.logger.Debug("Collecting namespaces for datastore %s (path: %s)", ds.Name, ds.Path) // Write location is deterministic; if excluded, skip the whole operation. outputPath := filepath.Join(datastoreDir, fmt.Sprintf("%s_namespaces.json", ds.Name)) @@ -65,7 +66,12 @@ func (c *Collector) collectDatastoreNamespaces(ds pbsDatastore, datastoreDir str return nil } - namespaces, fromFallback, err := listNamespacesFunc(ds.Name, ds.Path) + ioTimeout := time.Duration(0) + if c.config != nil && c.config.FsIoTimeoutSeconds > 0 { + ioTimeout = time.Duration(c.config.FsIoTimeoutSeconds) * time.Second + } + + namespaces, fromFallback, err := listNamespacesFunc(ctx, ds.Name, ds.Path, ioTimeout) if err != nil { return err } @@ -101,15 +107,11 @@ func (c *Collector) collectPBSPxarMetadata(ctx context.Context, datastores []pbs if dsWorkers <= 0 { dsWorkers = 1 } - intraWorkers := c.config.PxarIntraConcurrency - if intraWorkers <= 0 { - intraWorkers = 1 - } mode := "sequential" if dsWorkers > 1 { mode = fmt.Sprintf("parallel (%d workers)", dsWorkers) } - c.logger.Debug("PXAR metadata concurrency: datastores=%s, per-datastore workers=%d", mode, intraWorkers) + c.logger.Debug("PXAR metadata concurrency: datastores=%s", mode) pxarRoot := c.proxsaveInfoDir("pbs", "pxar") metaRoot := filepath.Join(pxarRoot, "metadata") @@ -190,9 +192,22 @@ func (c *Collector) processPxarDatastore(ctx context.Context, ds pbsDatastore, m return nil } - stat, err := os.Stat(ds.Path) - if err != nil || !stat.IsDir() { - c.logger.Debug("Skipping PXAR metadata for datastore %s (path not accessible: %s)", ds.Name, ds.Path) + ioTimeout := time.Duration(0) + if c.config != nil && c.config.FsIoTimeoutSeconds > 0 { + ioTimeout = time.Duration(c.config.FsIoTimeoutSeconds) * time.Second + } + + stat, err := safefs.Stat(ctx, ds.Path, ioTimeout) + if err != nil { + if errors.Is(err, safefs.ErrTimeout) { + c.logger.Warning("Skipping PXAR metadata for datastore %s (path=%s): filesystem probe timed out (%v)", ds.Name, ds.Path, err) + return nil + } + c.logger.Debug("Skipping PXAR metadata for datastore %s (path not accessible: %s): %v", ds.Name, ds.Path, err) + return nil + } + if !stat.IsDir() { + c.logger.Debug("Skipping PXAR metadata for datastore %s (path not a directory: %s)", ds.Name, ds.Path) return nil } @@ -229,7 +244,10 @@ func (c *Collector) processPxarDatastore(ctx context.Context, ds pbsDatastore, m ScannedAt: time.Now(), } - if dirs, err := c.sampleDirectories(ctx, ds.Path, 2, 30); err == nil && len(dirs) > 0 { + if dirs, err := c.sampleDirectoriesBounded(ctx, ds.Path, 2, 30, ioTimeout); errors.Is(err, safefs.ErrTimeout) { + c.logger.Warning("Skipping PXAR metadata for datastore %s (path=%s): directory sampling timed out (%v)", ds.Name, ds.Path, err) + return nil + } else if err == nil && len(dirs) > 0 { meta.SampleDirectories = dirs c.logger.Debug("PXAR: datastore %s -> selected %d sample directories", ds.Name, len(dirs)) } else if err != nil { @@ -241,7 +259,10 @@ func (c *Collector) processPxarDatastore(ctx context.Context, ds pbsDatastore, m includePatterns = []string{"*.pxar", "*.pxar.*", "catalog.pxar", "catalog.pxar.*"} } excludePatterns := c.config.PxarFileExcludePatterns - if files, err := c.sampleFiles(ctx, ds.Path, includePatterns, excludePatterns, 8, 200); err == nil && len(files) > 0 { + if files, err := c.sampleFilesBounded(ctx, ds.Path, includePatterns, excludePatterns, 8, 200, ioTimeout); errors.Is(err, safefs.ErrTimeout) { + c.logger.Warning("Skipping PXAR metadata for datastore %s (path=%s): file sampling timed out (%v)", ds.Name, ds.Path, err) + return nil + } else if err == nil && len(files) > 0 { meta.SamplePxarFiles = files c.logger.Debug("PXAR: datastore %s -> selected %d sample pxar files", ds.Name, len(files)) } else if err != nil { @@ -257,15 +278,27 @@ func (c *Collector) processPxarDatastore(ctx context.Context, ds pbsDatastore, m return err } - if err := c.writePxarSubdirReport(filepath.Join(dsDir, fmt.Sprintf("%s_subdirs.txt", ds.Name)), ds); err != nil { + if err := c.writePxarSubdirReport(ctx, filepath.Join(dsDir, fmt.Sprintf("%s_subdirs.txt", ds.Name)), ds, ioTimeout); err != nil { + if errors.Is(err, safefs.ErrTimeout) { + c.logger.Warning("Skipping PXAR metadata for datastore %s (path=%s): subdir report timed out (%v)", ds.Name, ds.Path, err) + return nil + } return err } - if err := c.writePxarListReport(filepath.Join(dsDir, fmt.Sprintf("%s_vm_pxar_list.txt", ds.Name)), ds, "vm"); err != nil { + if err := c.writePxarListReport(ctx, filepath.Join(dsDir, fmt.Sprintf("%s_vm_pxar_list.txt", ds.Name)), ds, "vm", ioTimeout); err != nil { + if errors.Is(err, safefs.ErrTimeout) { + c.logger.Warning("Skipping PXAR metadata for datastore %s (path=%s): VM list report timed out (%v)", ds.Name, ds.Path, err) + return nil + } return err } - if err := c.writePxarListReport(filepath.Join(dsDir, fmt.Sprintf("%s_ct_pxar_list.txt", ds.Name)), ds, "ct"); err != nil { + if err := c.writePxarListReport(ctx, filepath.Join(dsDir, fmt.Sprintf("%s_ct_pxar_list.txt", ds.Name)), ds, "ct", ioTimeout); err != nil { + if errors.Is(err, safefs.ErrTimeout) { + c.logger.Warning("Skipping PXAR metadata for datastore %s (path=%s): CT list report timed out (%v)", ds.Name, ds.Path, err) + return nil + } return err } @@ -273,14 +306,17 @@ func (c *Collector) processPxarDatastore(ctx context.Context, ds pbsDatastore, m return nil } -func (c *Collector) writePxarSubdirReport(target string, ds pbsDatastore) error { +func (c *Collector) writePxarSubdirReport(ctx context.Context, target string, ds pbsDatastore, ioTimeout time.Duration) error { c.logger.Debug("Writing PXAR subdirectory report for datastore %s", ds.Name) var builder strings.Builder builder.WriteString(fmt.Sprintf("# Datastore subdirectories in %s generated on %s\n", ds.Path, time.Now().Format(time.RFC1123))) builder.WriteString(fmt.Sprintf("# Datastore: %s\n", ds.Name)) - entries, err := os.ReadDir(ds.Path) + entries, err := safefs.ReadDir(ctx, ds.Path, ioTimeout) if err != nil { + if errors.Is(err, safefs.ErrTimeout) { + return err + } builder.WriteString(fmt.Sprintf("# Unable to read datastore path: %v\n", err)) return c.writeReportFile(target, []byte(builder.String())) } @@ -305,7 +341,7 @@ func (c *Collector) writePxarSubdirReport(target string, ds pbsDatastore) error return nil } -func (c *Collector) writePxarListReport(target string, ds pbsDatastore, subDir string) error { +func (c *Collector) writePxarListReport(ctx context.Context, target string, ds pbsDatastore, subDir string, ioTimeout time.Duration) error { c.logger.Debug("Writing PXAR file list for datastore %s subdir %s", ds.Name, subDir) basePath := filepath.Join(ds.Path, subDir) @@ -314,8 +350,11 @@ func (c *Collector) writePxarListReport(target string, ds pbsDatastore, subDir s builder.WriteString(fmt.Sprintf("# Datastore: %s, Subdirectory: %s\n", ds.Name, subDir)) builder.WriteString("# Format: permissions size date name\n") - entries, err := os.ReadDir(basePath) + entries, err := safefs.ReadDir(ctx, basePath, ioTimeout) if err != nil { + if errors.Is(err, safefs.ErrTimeout) { + return err + } builder.WriteString(fmt.Sprintf("# Unable to read directory: %v\n", err)) if writeErr := c.writeReportFile(target, []byte(builder.String())); writeErr != nil { return writeErr @@ -339,8 +378,13 @@ func (c *Collector) writePxarListReport(target string, ds pbsDatastore, subDir s if !strings.HasSuffix(entry.Name(), ".pxar") { continue } - info, err := entry.Info() + + fullPath := filepath.Join(basePath, entry.Name()) + info, err := safefs.Stat(ctx, fullPath, ioTimeout) if err != nil { + if errors.Is(err, safefs.ErrTimeout) { + return err + } continue } files = append(files, infoEntry{ diff --git a/internal/backup/collector_pbs_datastore_inventory.go b/internal/backup/collector_pbs_datastore_inventory.go index c549206..72bbb02 100644 --- a/internal/backup/collector_pbs_datastore_inventory.go +++ b/internal/backup/collector_pbs_datastore_inventory.go @@ -653,11 +653,11 @@ func extractFstabReferencedFiles(content string) []string { } keys := map[string]struct{}{ - "credentials": {}, - "cred": {}, - "passwd": {}, - "passfile": {}, - "keyfile": {}, + "credentials": {}, + "cred": {}, + "passwd": {}, + "passfile": {}, + "keyfile": {}, "identityfile": {}, } diff --git a/internal/backup/collector_pbs_extra_test.go b/internal/backup/collector_pbs_extra_test.go index d80dae4..bdb822f 100644 --- a/internal/backup/collector_pbs_extra_test.go +++ b/internal/backup/collector_pbs_extra_test.go @@ -7,6 +7,7 @@ import ( "os" "path/filepath" "testing" + "time" "github.com/tis24dev/proxsave/internal/pbs" "github.com/tis24dev/proxsave/internal/types" @@ -95,7 +96,7 @@ func TestCollectDatastoreNamespacesSuccessAndError(t *testing.T) { } origList := listNamespacesFunc - listNamespacesFunc = func(name, path string) ([]pbs.Namespace, bool, error) { + listNamespacesFunc = func(_ context.Context, name, path string, _ time.Duration) ([]pbs.Namespace, bool, error) { if name != ds.Name || path != ds.Path { t.Fatalf("unexpected args %s %s", name, path) } @@ -103,7 +104,7 @@ func TestCollectDatastoreNamespacesSuccessAndError(t *testing.T) { } t.Cleanup(func() { listNamespacesFunc = origList }) - if err := c.collectDatastoreNamespaces(ds, targetDir); err != nil { + if err := c.collectDatastoreNamespaces(context.Background(), ds, targetDir); err != nil { t.Fatalf("collectDatastoreNamespaces error: %v", err) } nsPath := filepath.Join(targetDir, "store_namespaces.json") @@ -111,10 +112,10 @@ func TestCollectDatastoreNamespacesSuccessAndError(t *testing.T) { t.Fatalf("expected namespaces file, got %v", err) } - listNamespacesFunc = func(string, string) ([]pbs.Namespace, bool, error) { + listNamespacesFunc = func(context.Context, string, string, time.Duration) ([]pbs.Namespace, bool, error) { return nil, false, errors.New("fail") } - if err := c.collectDatastoreNamespaces(ds, targetDir); err == nil { + if err := c.collectDatastoreNamespaces(context.Background(), ds, targetDir); err == nil { t.Fatalf("expected error when namespace listing fails") } } diff --git a/internal/backup/collector_pbs_notifications_summary.go b/internal/backup/collector_pbs_notifications_summary.go new file mode 100644 index 0000000..244cb8d --- /dev/null +++ b/internal/backup/collector_pbs_notifications_summary.go @@ -0,0 +1,220 @@ +package backup + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "sort" + "strings" + "time" +) + +type pbsNotificationSnapshotSummary struct { + Present bool `json:"present"` + Bytes int64 `json:"bytes,omitempty"` + Total int `json:"total,omitempty"` + BuiltIn int `json:"built_in,omitempty"` + Custom int `json:"custom,omitempty"` + Names []string `json:"names,omitempty"` + Error string `json:"error,omitempty"` +} + +type pbsNotificationsConfigFilesSummary struct { + NotificationsCfg ManifestEntry `json:"notifications_cfg"` + NotificationsPrivCfg ManifestEntry `json:"notifications_priv_cfg"` +} + +type pbsNotificationsSummary struct { + GeneratedAt time.Time `json:"generated_at"` + Enabled bool `json:"enabled"` + PrivEnabled bool `json:"priv_enabled"` + + ConfigFiles *pbsNotificationsConfigFilesSummary `json:"config_files,omitempty"` + + Targets pbsNotificationSnapshotSummary `json:"targets"` + Matchers pbsNotificationSnapshotSummary `json:"matchers"` + Endpoints map[string]pbsNotificationSnapshotSummary `json:"endpoints"` + + Notes []string `json:"notes,omitempty"` + Warnings []string `json:"warnings,omitempty"` +} + +func (c *Collector) writePBSNotificationSummary(commandsDir string) { + if c == nil { + return + } + + summary := pbsNotificationsSummary{ + GeneratedAt: time.Now().UTC(), + Enabled: c.config != nil && c.config.BackupPBSNotifications, + PrivEnabled: c.config != nil && c.config.BackupPBSNotifications && c.config.BackupPBSNotificationsPriv, + Endpoints: make(map[string]pbsNotificationSnapshotSummary), + } + + if c.pbsManifest != nil { + summary.ConfigFiles = &pbsNotificationsConfigFilesSummary{ + NotificationsCfg: c.pbsManifest["notifications.cfg"], + NotificationsPrivCfg: c.pbsManifest["notifications-priv.cfg"], + } + } + + summary.Targets = summarizePBSNotificationSnapshot(filepath.Join(commandsDir, "notification_targets.json")) + summary.Matchers = summarizePBSNotificationSnapshot(filepath.Join(commandsDir, "notification_matchers.json")) + for _, typ := range []string{"smtp", "sendmail", "gotify", "webhook"} { + summary.Endpoints[typ] = summarizePBSNotificationSnapshot(filepath.Join(commandsDir, fmt.Sprintf("notification_endpoints_%s.json", typ))) + } + + if summary.ConfigFiles != nil { + cfg := summary.ConfigFiles.NotificationsCfg + priv := summary.ConfigFiles.NotificationsPrivCfg + + if cfg.Status != StatusCollected && cfg.Status != StatusDisabled { + if summary.Targets.Total > 0 || sumEndpointTotals(summary.Endpoints) > 0 { + summary.Warnings = append(summary.Warnings, "Notification objects detected in snapshots, but notifications.cfg was not collected (check BACKUP_PBS_NOTIFICATIONS and exclusions).") + } + } + + if priv.Status == StatusDisabled { + summary.Notes = append(summary.Notes, "notifications-priv.cfg backup is disabled (BACKUP_PBS_NOTIFICATIONS_PRIV=false); endpoint credentials/secrets will not be included.") + } else if priv.Status != StatusCollected { + if summary.Targets.Custom > 0 || sumEndpointCustom(summary.Endpoints) > 0 { + summary.Warnings = append(summary.Warnings, "Custom notification endpoints/targets detected, but notifications-priv.cfg was not collected; restore may require re-entering secrets/credentials.") + } + } + } + + // Surface important mismatches in the console log too. + if c.logger != nil { + c.logger.Info("PBS notifications snapshot summary: targets=%d matchers=%d endpoints=%d", + summary.Targets.Total, + summary.Matchers.Total, + sumEndpointTotals(summary.Endpoints), + ) + for _, note := range summary.Notes { + c.logger.Info("PBS notifications: %s", note) + } + for _, warning := range summary.Warnings { + c.logger.Warning("PBS notifications: %s", warning) + } + } + + out, err := json.MarshalIndent(summary, "", " ") + if err != nil { + c.logger.Debug("PBS notifications summary skipped: marshal error: %v", err) + return + } + + if err := c.writeReportFile(filepath.Join(commandsDir, "notifications_summary.json"), out); err != nil { + c.logger.Debug("PBS notifications summary write failed: %v", err) + } +} + +func sumEndpointTotals(endpoints map[string]pbsNotificationSnapshotSummary) int { + total := 0 + for _, s := range endpoints { + total += s.Total + } + return total +} + +func sumEndpointCustom(endpoints map[string]pbsNotificationSnapshotSummary) int { + total := 0 + for _, s := range endpoints { + total += s.Custom + } + return total +} + +func summarizePBSNotificationSnapshot(path string) pbsNotificationSnapshotSummary { + raw, err := os.ReadFile(path) + if err != nil { + if os.IsNotExist(err) { + return pbsNotificationSnapshotSummary{Present: false} + } + return pbsNotificationSnapshotSummary{ + Present: false, + Error: err.Error(), + } + } + + summary := pbsNotificationSnapshotSummary{ + Present: true, + Bytes: int64(len(raw)), + } + + trimmed := strings.TrimSpace(string(raw)) + if trimmed == "" { + return summary + } + + var payload any + if err := json.Unmarshal([]byte(trimmed), &payload); err != nil { + summary.Error = fmt.Sprintf("invalid json: %v", err) + return summary + } + + // Unwrap proxmox-backup-manager JSON envelope (common shape: {"data":[...], ...}). + if m, ok := payload.(map[string]any); ok { + if data, ok := m["data"]; ok { + payload = data + } + } + + items, ok := payload.([]any) + if !ok { + summary.Error = "unexpected json shape (expected list)" + return summary + } + + summary.Total = len(items) + + names := make([]string, 0, len(items)) + for _, item := range items { + entry, ok := item.(map[string]any) + if !ok { + continue + } + + name := firstString(entry, "name", "id", "target", "matcher") + if name != "" { + names = append(names, name) + } + + origin := strings.ToLower(strings.TrimSpace(firstString(entry, "origin"))) + switch { + case strings.Contains(origin, "built"): + summary.BuiltIn++ + case strings.Contains(origin, "custom"): + summary.Custom++ + } + } + + sort.Strings(names) + if len(names) > 100 { + names = names[:100] + } + if len(names) > 0 { + summary.Names = names + } + + return summary +} + +func firstString(entry map[string]any, keys ...string) string { + for _, key := range keys { + v, ok := entry[key] + if !ok { + continue + } + s, ok := v.(string) + if !ok { + continue + } + s = strings.TrimSpace(s) + if s != "" { + return s + } + } + return "" +} diff --git a/internal/backup/collector_pbs_test.go b/internal/backup/collector_pbs_test.go index 61180ee..e44fddf 100644 --- a/internal/backup/collector_pbs_test.go +++ b/internal/backup/collector_pbs_test.go @@ -253,7 +253,7 @@ func TestHasTapeSupportHasDrives(t *testing.T) { } func TestCollectDatastoreNamespacesSuccess(t *testing.T) { - stubListNamespaces(t, func(name, path string) ([]pbs.Namespace, bool, error) { + stubListNamespaces(t, func(_ context.Context, name, path string, _ time.Duration) ([]pbs.Namespace, bool, error) { if name != "store1" || path != "/fake" { t.Fatalf("unexpected datastore %s %s", name, path) } @@ -270,7 +270,7 @@ func TestCollectDatastoreNamespacesSuccess(t *testing.T) { } ds := pbsDatastore{Name: "store1", Path: "/fake"} - if err := collector.collectDatastoreNamespaces(ds, dsDir); err != nil { + if err := collector.collectDatastoreNamespaces(context.Background(), ds, dsDir); err != nil { t.Fatalf("collectDatastoreNamespaces failed: %v", err) } @@ -289,7 +289,7 @@ func TestCollectDatastoreNamespacesSuccess(t *testing.T) { } func TestCollectDatastoreNamespacesError(t *testing.T) { - stubListNamespaces(t, func(string, string) ([]pbs.Namespace, bool, error) { + stubListNamespaces(t, func(context.Context, string, string, time.Duration) ([]pbs.Namespace, bool, error) { return nil, false, fmt.Errorf("boom") }) @@ -299,14 +299,14 @@ func TestCollectDatastoreNamespacesError(t *testing.T) { t.Fatalf("failed to create datastore dir: %v", err) } - err := collector.collectDatastoreNamespaces(pbsDatastore{Name: "store1"}, dsDir) + err := collector.collectDatastoreNamespaces(context.Background(), pbsDatastore{Name: "store1"}, dsDir) if err == nil || !strings.Contains(err.Error(), "boom") { t.Fatalf("expected error from list namespaces, got %v", err) } } func TestCollectDatastoreConfigsDryRun(t *testing.T) { - stubListNamespaces(t, func(string, string) ([]pbs.Namespace, bool, error) { + stubListNamespaces(t, func(context.Context, string, string, time.Duration) ([]pbs.Namespace, bool, error) { return []pbs.Namespace{{Ns: ""}}, false, nil }) @@ -395,7 +395,7 @@ func TestCollectUserConfigsMissingUserList(t *testing.T) { } } -func stubListNamespaces(t *testing.T, fn func(string, string) ([]pbs.Namespace, bool, error)) { +func stubListNamespaces(t *testing.T, fn func(context.Context, string, string, time.Duration) ([]pbs.Namespace, bool, error)) { t.Helper() orig := listNamespacesFunc listNamespacesFunc = fn diff --git a/internal/backup/collector_pve.go b/internal/backup/collector_pve.go index 756356a..ed0a8a6 100644 --- a/internal/backup/collector_pve.go +++ b/internal/backup/collector_pve.go @@ -11,8 +11,9 @@ import ( "path/filepath" "sort" "strings" - "syscall" "time" + + "github.com/tis24dev/proxsave/internal/safefs" ) type pveStorageEntry struct { @@ -20,6 +21,13 @@ type pveStorageEntry struct { Path string Type string Content string + + // Runtime status fields from `pvesh get /nodes//storage`. + // These are optional and may be nil/empty depending on the data source + // (e.g. storage.cfg parsing has no runtime status). + Active *bool + Enabled *bool + Status string } type pveRuntimeInfo struct { @@ -659,11 +667,6 @@ func (c *Collector) collectPVECommands(ctx context.Context, clustered bool) (*pv if nodeName == "" { nodeName = hostname } - c.safeCmdOutput(ctx, - fmt.Sprintf("pvesh get /nodes/%s/storage --output-format=json", nodeName), - filepath.Join(commandsDir, "storage_status.json"), - "Storage status", - false) // Disk list c.safeCmdOutput(ctx, @@ -717,12 +720,46 @@ func (c *Collector) collectPVECommands(ctx context.Context, clustered bool) (*pv } func parseNodeStorageList(data []byte) ([]pveStorageEntry, error) { + parseOptionalBool := func(value any) *bool { + if value == nil { + return nil + } + switch v := value.(type) { + case bool: + b := v + return &b + case float64: + b := v != 0 + return &b + case string: + s := strings.ToLower(strings.TrimSpace(v)) + if s == "" { + return nil + } + switch s { + case "1", "true", "yes", "on": + b := true + return &b + case "0", "false", "no", "off": + b := false + return &b + default: + return nil + } + default: + return nil + } + } + var raw []struct { Storage string `json:"storage"` Name string `json:"name"` Path string `json:"path"` Type string `json:"type"` Content string `json:"content"` + Active any `json:"active"` + Enabled any `json:"enabled"` + Status string `json:"status"` } if err := json.Unmarshal(data, &raw); err != nil { return nil, err @@ -746,6 +783,9 @@ func parseNodeStorageList(data []byte) ([]pveStorageEntry, error) { Path: strings.TrimSpace(item.Path), Type: strings.TrimSpace(item.Type), Content: strings.TrimSpace(item.Content), + Active: parseOptionalBool(item.Active), + Enabled: parseOptionalBool(item.Enabled), + Status: strings.TrimSpace(item.Status), }) } return entries, nil @@ -991,6 +1031,48 @@ func (c *Collector) collectPVEStorageMetadata(ctx context.Context, storages []pv summary.WriteString(time.Now().Format(time.RFC3339)) summary.WriteString("\n# Format: NAME|PATH|TYPE|CONTENT\n\n") + ioTimeout := time.Duration(0) + if c.config != nil && c.config.FsIoTimeoutSeconds > 0 { + ioTimeout = time.Duration(c.config.FsIoTimeoutSeconds) * time.Second + } + + formatRuntime := func(storage pveStorageEntry) string { + parts := make([]string, 0, 3) + if storage.Active != nil { + parts = append(parts, fmt.Sprintf("active=%v", *storage.Active)) + } + if storage.Enabled != nil { + parts = append(parts, fmt.Sprintf("enabled=%v", *storage.Enabled)) + } + if status := strings.TrimSpace(storage.Status); status != "" { + parts = append(parts, "status="+status) + } + if len(parts) == 0 { + return "" + } + return " (" + strings.Join(parts, " ") + ")" + } + + unavailableReason := func(storage pveStorageEntry) string { + if storage.Enabled != nil && !*storage.Enabled { + return "enabled=false" + } + if storage.Active != nil && !*storage.Active { + return "active=false" + } + if status := strings.ToLower(strings.TrimSpace(storage.Status)); status != "" { + switch status { + case "available", "active", "ok": + // Known-good states + case "unknown", "inactive", "disabled", "unavailable", "error": + return "status=" + status + default: + // Unknown status: do not skip based on this field alone. + } + } + return "" + } + processed := 0 for _, storage := range storages { if storage.Path == "" { @@ -999,8 +1081,24 @@ func (c *Collector) collectPVEStorageMetadata(ctx context.Context, storages []pv if err := ctx.Err(); err != nil { return err } - if stat, err := os.Stat(storage.Path); err != nil || !stat.IsDir() { - c.logger.Debug("Skipping datastore %s (path not accessible: %s)", storage.Name, storage.Path) + + if reason := unavailableReason(storage); reason != "" { + c.logger.Warning("Skipping datastore %s (path=%s)%s: not available (%s)", storage.Name, storage.Path, formatRuntime(storage), reason) + continue + } + + c.logger.Info("Probing datastore %s (path=%s)%s", storage.Name, storage.Path, formatRuntime(storage)) + stat, err := safefs.Stat(ctx, storage.Path, ioTimeout) + if err != nil { + if errors.Is(err, safefs.ErrTimeout) { + c.logger.Warning("Skipping datastore %s (path=%s)%s: filesystem probe timed out (%v)", storage.Name, storage.Path, formatRuntime(storage), err) + } else { + c.logger.Debug("Skipping datastore %s (path not accessible: %s): %v", storage.Name, storage.Path, err) + } + continue + } + if !stat.IsDir() { + c.logger.Debug("Skipping datastore %s (path not a directory: %s)", storage.Name, storage.Path) continue } @@ -1034,7 +1132,11 @@ func (c *Collector) collectPVEStorageMetadata(ctx context.Context, storages []pv ScannedAt: time.Now(), } - dirSamples, dirSampleErr := c.sampleDirectories(ctx, storage.Path, 2, 20) + dirSamples, dirSampleErr := c.sampleDirectoriesBounded(ctx, storage.Path, 2, 20, ioTimeout) + if errors.Is(dirSampleErr, safefs.ErrTimeout) { + c.logger.Warning("Skipping datastore %s (path=%s)%s: directory sampling timed out (%v)", storage.Name, storage.Path, formatRuntime(storage), dirSampleErr) + continue + } if dirSampleErr != nil { c.logger.Debug("Directory sample for datastore %s failed: %v", storage.Name, dirSampleErr) } @@ -1042,7 +1144,11 @@ func (c *Collector) collectPVEStorageMetadata(ctx context.Context, storages []pv meta.SampleDirectories = dirSamples } - diskUsageText, diskUsageErr := c.describeDiskUsage(storage.Path) + diskUsageText, diskUsageErr := c.describeDiskUsage(ctx, storage.Path, ioTimeout) + if errors.Is(diskUsageErr, safefs.ErrTimeout) { + c.logger.Warning("Skipping datastore %s (path=%s)%s: disk usage probe timed out (%v)", storage.Name, storage.Path, formatRuntime(storage), diskUsageErr) + continue + } if diskUsageErr != nil { c.logger.Debug("Disk usage summary for %s failed: %v", storage.Name, diskUsageErr) } else { @@ -1059,7 +1165,11 @@ func (c *Collector) collectPVEStorageMetadata(ctx context.Context, storages []pv } excludePatterns := c.config.PxarFileExcludePatterns - fileSummaries, sampleFileErr := c.sampleFiles(ctx, storage.Path, includePatterns, excludePatterns, 3, 100) + fileSummaries, sampleFileErr := c.sampleFilesBounded(ctx, storage.Path, includePatterns, excludePatterns, 3, 100, ioTimeout) + if errors.Is(sampleFileErr, safefs.ErrTimeout) { + c.logger.Warning("Skipping datastore %s (path=%s)%s: file sampling timed out (%v)", storage.Name, storage.Path, formatRuntime(storage), sampleFileErr) + continue + } if sampleFileErr != nil { c.logger.Debug("Backup file sample for %s failed: %v", storage.Name, sampleFileErr) } else if len(fileSummaries) > 0 { @@ -1075,7 +1185,11 @@ func (c *Collector) collectPVEStorageMetadata(ctx context.Context, storages []pv return err } - fileSampleLines, fileSampleErr := c.sampleMetadataFileStats(ctx, storage.Path, 3, 10) + fileSampleLines, fileSampleErr := c.sampleMetadataFileStats(ctx, storage.Path, 3, 10, ioTimeout) + if errors.Is(fileSampleErr, safefs.ErrTimeout) { + c.logger.Warning("Skipping datastore %s (path=%s)%s: metadata sampling timed out (%v)", storage.Name, storage.Path, formatRuntime(storage), fileSampleErr) + continue + } if fileSampleErr != nil { c.logger.Debug("General file sampling for %s failed: %v", storage.Name, fileSampleErr) } @@ -1086,7 +1200,7 @@ func (c *Collector) collectPVEStorageMetadata(ctx context.Context, storages []pv if c.config.BackupPVEBackupFiles { c.logger.Info("Analyzing PVE backup files in datastore: %s", storage.Name) - if err := c.collectDetailedPVEBackups(ctx, storage, metaDir); err != nil { + if err := c.collectDetailedPVEBackups(ctx, storage, metaDir, ioTimeout); err != nil { c.logger.Warning("Detailed backup analysis for %s failed: %v", storage.Name, err) } } else { @@ -1105,7 +1219,7 @@ func (c *Collector) collectPVEStorageMetadata(ctx context.Context, storages []pv return nil } -func (c *Collector) collectDetailedPVEBackups(ctx context.Context, storage pveStorageEntry, metaDir string) error { +func (c *Collector) collectDetailedPVEBackups(ctx context.Context, storage pveStorageEntry, metaDir string, ioTimeout time.Duration) error { if err := ctx.Err(); err != nil { return err } @@ -1156,56 +1270,86 @@ func (c *Collector) collectDetailedPVEBackups(ctx context.Context, storage pveSt } } - walkErr := filepath.WalkDir(storage.Path, func(path string, d fs.DirEntry, walkErr error) error { - if walkErr != nil { - c.logger.Debug("Skipping %s: %v", path, walkErr) - return nil - } + type dirItem struct { + path string + } + stack := []dirItem{{path: storage.Path}} + + for len(stack) > 0 { if err := ctx.Err(); err != nil { return err } - if d.IsDir() { - return nil + + item := stack[len(stack)-1] + stack = stack[:len(stack)-1] + + if c.shouldExclude(item.path) { + continue } - info, err := d.Info() + entries, err := safefs.ReadDir(ctx, item.path, ioTimeout) if err != nil { - c.logger.Debug("Failed to stat %s: %v", path, err) - return nil + if errors.Is(err, safefs.ErrTimeout) { + return err + } + c.logger.Debug("Skipping %s: %v", item.path, err) + continue } - base := filepath.Base(path) - matched := false - for _, w := range writers { - if matchPattern(base, w.pattern) { - matched = true - if err := w.Write(path, info); err != nil { - c.logger.Debug("Failed to log %s for pattern %s: %v", path, w.pattern, err) + for _, entry := range entries { + if err := ctx.Err(); err != nil { + return err + } + + name := entry.Name() + fullPath := filepath.Join(item.path, name) + if c.shouldExclude(fullPath) { + continue + } + if entry.IsDir() { + stack = append(stack, dirItem{path: fullPath}) + continue + } + + matchedWriters := make([]*patternWriter, 0, 2) + for _, w := range writers { + if matchPattern(name, w.pattern) { + matchedWriters = append(matchedWriters, w) } } - } + if len(matchedWriters) == 0 { + continue + } - if !matched { - return nil - } + info, err := safefs.Stat(ctx, fullPath, ioTimeout) + if err != nil { + if errors.Is(err, safefs.ErrTimeout) { + return err + } + c.logger.Debug("Failed to stat %s: %v", fullPath, err) + continue + } + + for _, w := range matchedWriters { + if err := w.Write(fullPath, info); err != nil { + c.logger.Debug("Failed to log %s for pattern %s: %v", fullPath, w.pattern, err) + } + } - totalFiles++ - totalSize += info.Size() + totalFiles++ + totalSize += info.Size() - if smallDir != "" && info.Size() <= c.config.MaxPVEBackupSizeBytes { - if err := c.copyBackupSample(ctx, path, smallDir, fmt.Sprintf("small PVE backup %s", filepath.Base(path))); err != nil { - c.logger.Debug("Failed to copy small backup %s: %v", path, err) + if smallDir != "" && info.Size() <= c.config.MaxPVEBackupSizeBytes { + if err := c.copyBackupSample(ctx, fullPath, smallDir, fmt.Sprintf("small PVE backup %s", name)); err != nil { + c.logger.Debug("Failed to copy small backup %s: %v", fullPath, err) + } } - } - if includeDir != "" && strings.Contains(path, includePattern) { - if err := c.copyBackupSample(ctx, path, includeDir, fmt.Sprintf("selected PVE backup %s", filepath.Base(path))); err != nil { - c.logger.Debug("Failed to copy pattern backup %s: %v", path, err) + if includeDir != "" && strings.Contains(fullPath, includePattern) { + if err := c.copyBackupSample(ctx, fullPath, includeDir, fmt.Sprintf("selected PVE backup %s", name)); err != nil { + c.logger.Debug("Failed to copy pattern backup %s: %v", fullPath, err) + } } } - return nil - }) - if walkErr != nil { - return walkErr } if err := c.writePatternSummary(storage, analysisDir, writers, totalFiles, totalSize); err != nil { @@ -1326,7 +1470,7 @@ func (pw *patternWriter) Write(path string, info os.FileInfo) error { func (pw *patternWriter) Close() error { var err error if pw.writer != nil { - if flushErr := pw.writer.Flush(); flushErr != nil && err == nil { + if flushErr := pw.writer.Flush(); flushErr != nil { err = flushErr } } @@ -1556,6 +1700,9 @@ func (c *Collector) copyIfExists(source, target, description string) error { } func (c *Collector) aggregateBackupHistory(ctx context.Context, jobsDir, target string) error { + if err := ctx.Err(); err != nil { + return err + } entries, err := os.ReadDir(jobsDir) if err != nil { return err @@ -1563,6 +1710,9 @@ func (c *Collector) aggregateBackupHistory(ctx context.Context, jobsDir, target var buffers []json.RawMessage for _, entry := range entries { + if err := ctx.Err(); err != nil { + return err + } if entry.IsDir() { continue } @@ -1599,6 +1749,9 @@ func (c *Collector) aggregateBackupHistory(ctx context.Context, jobsDir, target } func (c *Collector) aggregateReplicationStatus(ctx context.Context, replicationDir, target string) error { + if err := ctx.Err(); err != nil { + return err + } entries, err := os.ReadDir(replicationDir) if err != nil { return err @@ -1606,6 +1759,9 @@ func (c *Collector) aggregateReplicationStatus(ctx context.Context, replicationD var buffers []json.RawMessage for _, entry := range entries { + if err := ctx.Err(); err != nil { + return err + } if entry.IsDir() { continue } @@ -1763,9 +1919,9 @@ func (c *Collector) parseStorageConfigEntries() []pveStorageEntry { return entries } -func (c *Collector) describeDiskUsage(path string) (string, error) { - var stat syscall.Statfs_t - if err := syscall.Statfs(path, &stat); err != nil { +func (c *Collector) describeDiskUsage(ctx context.Context, path string, ioTimeout time.Duration) (string, error) { + stat, err := safefs.Statfs(ctx, path, ioTimeout) + if err != nil { return "", err } total := int64(stat.Blocks) * int64(stat.Bsize) @@ -1781,50 +1937,64 @@ func (c *Collector) describeDiskUsage(path string) (string, error) { ), nil } -func (c *Collector) sampleMetadataFileStats(ctx context.Context, root string, maxDepth, limit int) ([]string, error) { +func (c *Collector) sampleMetadataFileStats(ctx context.Context, root string, maxDepth, limit int, ioTimeout time.Duration) ([]string, error) { lines := make([]string, 0, limit) - if limit <= 0 { + if limit <= 0 || maxDepth <= 0 { return lines, nil } root = filepath.Clean(root) - stopErr := errors.New("metadata sample limit reached") + type dirItem struct { + path string + depth int + } + stack := []dirItem{{path: root, depth: 0}} - err := filepath.WalkDir(root, func(path string, d fs.DirEntry, walkErr error) error { - if walkErr != nil { - return walkErr - } + for len(stack) > 0 && len(lines) < limit { if err := ctx.Err(); err != nil { - return err - } - depth := relativeDepth(root, path) - if d.IsDir() { - if depth >= maxDepth { - return filepath.SkipDir - } - return nil + return lines, err } - info, err := d.Info() + item := stack[len(stack)-1] + stack = stack[:len(stack)-1] + + entries, err := safefs.ReadDir(ctx, item.path, ioTimeout) if err != nil { - return nil + return lines, err } - line := fmt.Sprintf("%s %d %s", - info.ModTime().Format(time.RFC3339), - info.Size(), - path, - ) - lines = append(lines, line) - if len(lines) >= limit { - return stopErr - } - return nil - }) + for _, entry := range entries { + if err := ctx.Err(); err != nil { + return lines, err + } - if err != nil && !errors.Is(err, stopErr) { - return lines, err + full := filepath.Join(item.path, entry.Name()) + if entry.IsDir() { + if item.depth+1 >= maxDepth { + continue + } + stack = append(stack, dirItem{path: full, depth: item.depth + 1}) + continue + } + + info, err := safefs.Stat(ctx, full, ioTimeout) + if err != nil { + if errors.Is(err, safefs.ErrTimeout) { + return lines, err + } + continue + } + lines = append(lines, fmt.Sprintf("%s %d %s", + info.ModTime().Format(time.RFC3339), + info.Size(), + full, + )) + if len(lines) >= limit { + break + } + } } + return lines, nil } diff --git a/internal/backup/collector_pve_additional_test.go b/internal/backup/collector_pve_additional_test.go index 92ae4c6..5b5ef98 100644 --- a/internal/backup/collector_pve_additional_test.go +++ b/internal/backup/collector_pve_additional_test.go @@ -166,7 +166,7 @@ func TestCollectDetailedPVEBackups(t *testing.T) { t.Fatalf("MkdirAll: %v", err) } - err := collector.collectDetailedPVEBackups(context.Background(), storage, metaDir) + err := collector.collectDetailedPVEBackups(context.Background(), storage, metaDir, 0) if err != nil { t.Fatalf("collectDetailedPVEBackups error: %v", err) } @@ -201,7 +201,7 @@ func TestCollectDetailedPVEBackups(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) cancel() // Cancel immediately - err := collector.collectDetailedPVEBackups(ctx, storage, metaDir) + err := collector.collectDetailedPVEBackups(ctx, storage, metaDir, 0) if err == nil || err != context.Canceled { t.Errorf("expected context.Canceled error, got: %v", err) } @@ -229,7 +229,7 @@ func TestCollectDetailedPVEBackups(t *testing.T) { t.Fatalf("MkdirAll: %v", err) } - err := collector.collectDetailedPVEBackups(context.Background(), storage, metaDir) + err := collector.collectDetailedPVEBackups(context.Background(), storage, metaDir, 0) if err != nil { t.Fatalf("collectDetailedPVEBackups error: %v", err) } @@ -266,7 +266,7 @@ func TestCollectDetailedPVEBackups(t *testing.T) { t.Fatalf("MkdirAll: %v", err) } - err := collector.collectDetailedPVEBackups(context.Background(), storage, metaDir) + err := collector.collectDetailedPVEBackups(context.Background(), storage, metaDir, 0) if err != nil { t.Fatalf("collectDetailedPVEBackups error: %v", err) } @@ -305,7 +305,7 @@ func TestCollectDetailedPVEBackups(t *testing.T) { t.Fatalf("MkdirAll: %v", err) } - err := collector.collectDetailedPVEBackups(context.Background(), storage, metaDir) + err := collector.collectDetailedPVEBackups(context.Background(), storage, metaDir, 0) if err != nil { t.Fatalf("collectDetailedPVEBackups error: %v", err) } @@ -339,7 +339,7 @@ func TestCollectDetailedPVEBackups(t *testing.T) { t.Fatalf("MkdirAll: %v", err) } - err := collector.collectDetailedPVEBackups(context.Background(), storage, metaDir) + err := collector.collectDetailedPVEBackups(context.Background(), storage, metaDir, 0) if err != nil { t.Fatalf("collectDetailedPVEBackups error: %v", err) } @@ -368,7 +368,7 @@ func TestCollectDetailedPVEBackups(t *testing.T) { t.Fatalf("MkdirAll: %v", err) } - err := collector.collectDetailedPVEBackups(context.Background(), storage, metaDir) + err := collector.collectDetailedPVEBackups(context.Background(), storage, metaDir, 0) if err != nil { t.Fatalf("collectDetailedPVEBackups error: %v", err) } @@ -504,7 +504,7 @@ func TestSampleMetadataFileStats(t *testing.T) { cfg := GetDefaultCollectorConfig() collector := NewCollector(logger, cfg, tmpDir, "pve", false) - lines, err := collector.sampleMetadataFileStats(context.Background(), tmpDir, 3, 10) + lines, err := collector.sampleMetadataFileStats(context.Background(), tmpDir, 3, 10, 0) if err != nil { t.Fatalf("sampleMetadataFileStats error: %v", err) } @@ -535,7 +535,7 @@ func TestSampleMetadataFileStats(t *testing.T) { cfg := GetDefaultCollectorConfig() collector := NewCollector(logger, cfg, tmpDir, "pve", false) - lines, err := collector.sampleMetadataFileStats(context.Background(), tmpDir, 2, 100) + lines, err := collector.sampleMetadataFileStats(context.Background(), tmpDir, 2, 100, 0) if err != nil { t.Fatalf("sampleMetadataFileStats error: %v", err) } @@ -554,7 +554,7 @@ func TestSampleMetadataFileStats(t *testing.T) { cfg := GetDefaultCollectorConfig() collector := NewCollector(logger, cfg, tmpDir, "pve", false) - lines, err := collector.sampleMetadataFileStats(context.Background(), tmpDir, 3, 10) + lines, err := collector.sampleMetadataFileStats(context.Background(), tmpDir, 3, 10, 0) if err != nil { t.Fatalf("sampleMetadataFileStats error: %v", err) } @@ -577,7 +577,7 @@ func TestSampleMetadataFileStats(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) cancel() - _, err := collector.sampleMetadataFileStats(ctx, tmpDir, 3, 10) + _, err := collector.sampleMetadataFileStats(ctx, tmpDir, 3, 10, 0) if err == nil { t.Error("expected context cancelled error") } @@ -593,7 +593,7 @@ func TestSampleMetadataFileStats(t *testing.T) { cfg := GetDefaultCollectorConfig() collector := NewCollector(logger, cfg, tmpDir, "pve", false) - lines, err := collector.sampleMetadataFileStats(context.Background(), tmpDir, 3, 0) + lines, err := collector.sampleMetadataFileStats(context.Background(), tmpDir, 3, 0, 0) if err != nil { t.Fatalf("sampleMetadataFileStats error: %v", err) } diff --git a/internal/backup/collector_pve_parse_test.go b/internal/backup/collector_pve_parse_test.go index 8e3b96c..aaea755 100644 --- a/internal/backup/collector_pve_parse_test.go +++ b/internal/backup/collector_pve_parse_test.go @@ -150,6 +150,29 @@ func TestParseNodeStorageList(t *testing.T) { } } +func TestParseNodeStorageList_RuntimeFields(t *testing.T) { + input := `[ + {"storage": "nfs1", "path": "/mnt/nfs", "type": "nfs", "content": "backup", "active": 1, "enabled": 0, "status": "available"} + ]` + result, err := parseNodeStorageList([]byte(input)) + if err != nil { + t.Fatalf("parseNodeStorageList() unexpected error = %v", err) + } + if len(result) != 1 { + t.Fatalf("parseNodeStorageList() returned %d entries, want 1", len(result)) + } + got := result[0] + if got.Active == nil || *got.Active != true { + t.Fatalf("entry.Active = %#v; want true", got.Active) + } + if got.Enabled == nil || *got.Enabled != false { + t.Fatalf("entry.Enabled = %#v; want false", got.Enabled) + } + if got.Status != "available" { + t.Fatalf("entry.Status = %q; want %q", got.Status, "available") + } +} + // TestParseStorageConfigEntries tests parsing PVE storage.cfg file func TestParseStorageConfigEntries(t *testing.T) { tests := []struct { diff --git a/internal/backup/collector_pxar_datastore_test.go b/internal/backup/collector_pxar_datastore_test.go index 6810fe8..ee63819 100644 --- a/internal/backup/collector_pxar_datastore_test.go +++ b/internal/backup/collector_pxar_datastore_test.go @@ -37,7 +37,7 @@ func TestWritePxarListReportWithFiles(t *testing.T) { ds := pbsDatastore{Name: "ds1", Path: filepath.Join(tmp, "ds1")} target := filepath.Join(tmp, "list.txt") c := NewCollector(newTestLogger(), GetDefaultCollectorConfig(), tmp, types.ProxmoxBS, false) - if err := c.writePxarListReport(target, ds, "ct"); err != nil { + if err := c.writePxarListReport(context.Background(), target, ds, "ct", 0); err != nil { t.Fatalf("writePxarListReport: %v", err) } content, err := os.ReadFile(target) diff --git a/internal/backup/collector_pxar_reports_test.go b/internal/backup/collector_pxar_reports_test.go index 4439d25..e22d903 100644 --- a/internal/backup/collector_pxar_reports_test.go +++ b/internal/backup/collector_pxar_reports_test.go @@ -1,6 +1,7 @@ package backup import ( + "context" "os" "path/filepath" "testing" @@ -16,7 +17,7 @@ func TestWritePxarSubdirReportHandlesMissingPath(t *testing.T) { ds := pbsDatastore{Name: "ds1", Path: filepath.Join(tmp, "missing")} c := NewCollector(newTestLogger(), GetDefaultCollectorConfig(), tmp, types.ProxmoxBS, false) - if err := c.writePxarSubdirReport(target, ds); err != nil { + if err := c.writePxarSubdirReport(context.Background(), target, ds, 0); err != nil { t.Fatalf("writePxarSubdirReport error: %v", err) } content, err := os.ReadFile(target) @@ -39,7 +40,7 @@ func TestWritePxarListReportNoFiles(t *testing.T) { target := filepath.Join(tmp, "list.txt") c := NewCollector(newTestLogger(), GetDefaultCollectorConfig(), tmp, types.ProxmoxBS, false) - if err := c.writePxarListReport(target, ds, "vm"); err != nil { + if err := c.writePxarListReport(context.Background(), target, ds, "vm", 0); err != nil { t.Fatalf("writePxarListReport error: %v", err) } content, err := os.ReadFile(target) diff --git a/internal/backup/collector_pxar_roots_test.go b/internal/backup/collector_pxar_roots_test.go deleted file mode 100644 index a03348d..0000000 --- a/internal/backup/collector_pxar_roots_test.go +++ /dev/null @@ -1,43 +0,0 @@ -package backup - -import ( - "context" - "os" - "path/filepath" - "testing" - - "github.com/tis24dev/proxsave/internal/types" -) - -func TestComputePxarWorkerRootsCachesResults(t *testing.T) { - root := t.TempDir() - for _, p := range []string{"a/one", "b", "c/d"} { - if err := os.MkdirAll(filepath.Join(root, p), 0o755); err != nil { - t.Fatalf("mkdir %s: %v", p, err) - } - } - - cfg := GetDefaultCollectorConfig() - cfg.PxarScanFanoutLevel = 2 - cfg.PxarScanMaxRoots = 2 - c := NewCollector(newTestLogger(), cfg, root, types.ProxmoxBS, false) - - ctx := context.Background() - first, err := c.computePxarWorkerRoots(ctx, root, "test") - if err != nil { - t.Fatalf("computePxarWorkerRoots error: %v", err) - } - if len(first) == 0 || len(first) > 2 { - t.Fatalf("unexpected roots count: %d", len(first)) - } - - // Remove the directory to ensure cached results are used. - os.RemoveAll(root) - second, err := c.computePxarWorkerRoots(ctx, root, "test") - if err != nil { - t.Fatalf("computePxarWorkerRoots (cached) error: %v", err) - } - if len(second) != len(first) { - t.Fatalf("cached results length mismatch: %d vs %d", len(second), len(first)) - } -} diff --git a/internal/backup/collector_pxar_test.go b/internal/backup/collector_pxar_test.go deleted file mode 100644 index 1626c1f..0000000 --- a/internal/backup/collector_pxar_test.go +++ /dev/null @@ -1,729 +0,0 @@ -package backup - -import ( - "context" - "fmt" - "os" - "path/filepath" - "reflect" - "strings" - "testing" - - "github.com/tis24dev/proxsave/internal/logging" - "github.com/tis24dev/proxsave/internal/types" -) - -type closedDoneContext struct { - context.Context - done chan struct{} - err error -} - -func newClosedDoneContext(err error) *closedDoneContext { - ch := make(chan struct{}) - close(ch) - return &closedDoneContext{ - Context: context.Background(), - done: ch, - err: err, - } -} - -func (c *closedDoneContext) Done() <-chan struct{} { return c.done } -func (c *closedDoneContext) Err() error { return c.err } - -func TestDownsampleRoots(t *testing.T) { - roots := []string{"a", "b", "c", "d"} - if got := downsampleRoots(roots, 0); !reflect.DeepEqual(got, roots) { - t.Fatalf("limit=0 should return original slice") - } - limited := downsampleRoots(roots, 2) - if len(limited) != 2 { - t.Fatalf("expected limited slice len 2, got %d", len(limited)) - } - seen := map[string]bool{} - for _, r := range limited { - if seen[r] { - t.Fatalf("duplicate in downsampled roots: %s", r) - } - seen[r] = true - } -} - -func TestDeterministicShuffleAndSeed(t *testing.T) { - items := []string{"one", "two", "three"} - seed := deterministicSeed("a", "b") - seed2 := deterministicSeed("a", "b", "c") - if seed == seed2 { - t.Fatalf("different seed inputs should differ") - } - - first := append([]string(nil), items...) - second := append([]string(nil), items...) - shuffleStringsDeterministic(first, seed) - shuffleStringsDeterministic(second, seed) - if !reflect.DeepEqual(first, second) { - t.Fatalf("shuffle with same seed should be deterministic") - } -} - -func TestPxarRootSelector(t *testing.T) { - sel := newPxarRootSelector(2) - for _, p := range []string{"a", "b", "c", "d"} { - sel.consider(p) - } - results := sel.results() - if len(results) != 2 { - t.Fatalf("expected 2 results, got %d", len(results)) - } - if sel.total != 4 || !sel.capped { - t.Fatalf("selector total=%d capped=%v want total=4 capped=true", sel.total, sel.capped) - } -} - -func TestPxarRootSelectorLimitZeroReturnsAllUnique(t *testing.T) { - sel := newPxarRootSelector(0) - for _, p := range []string{"a", "a", "b"} { - sel.consider(p) - } - results := sel.results() - if len(results) != 2 { - t.Fatalf("expected unique results, got %v", results) - } -} - -func TestPxarRootSelectorSkipsReplacementForHighWeightCandidate(t *testing.T) { - p1, p2 := "a", "b" - if hashPath(p1) > hashPath(p2) { - p1, p2 = p2, p1 - } - - sel := newPxarRootSelector(1) - sel.consider(p1) - sel.consider(p2) // higher weight => should be ignored - - results := sel.results() - if len(results) != 1 || results[0] != p1 { - t.Fatalf("expected selector to keep low-weight %q, got %v", p1, results) - } - if !sel.capped { - t.Fatalf("expected selector capped=true") - } -} - -func TestRecomputeMaxHandlesEmptyItems(t *testing.T) { - sel := newPxarRootSelector(1) - sel.items = nil - sel.recomputeMax() - if sel.maxIdx != -1 || sel.maxWeight != 0 { - t.Fatalf("unexpected recompute state: maxIdx=%d maxWeight=%d", sel.maxIdx, sel.maxWeight) - } -} - -func TestHashPathAndUniquePaths(t *testing.T) { - if hashPath("foo") == hashPath("bar") { - t.Fatalf("expected different hashes for different inputs") - } - paths := []string{"a", "a", "b"} - unique := uniquePaths(paths) - if len(unique) != 2 || unique[0] != "a" || unique[1] != "b" { - t.Fatalf("uniquePaths failed: %#v", unique) - } -} - -func TestUniquePathsEmptyInput(t *testing.T) { - if got := uniquePaths(nil); got != nil { - t.Fatalf("expected nil, got %#v", got) - } - if got := uniquePaths([]string{}); len(got) != 0 { - t.Fatalf("expected empty slice, got %#v", got) - } -} - -func TestDownsampleRootsStepOneReturnsPrefix(t *testing.T) { - roots := []string{"a", "b", "c"} - got := downsampleRoots(roots, 2) - if !reflect.DeepEqual(got, []string{"a", "b"}) { - t.Fatalf("expected prefix, got %#v", got) - } -} - -func TestSampleFilesRespectsPatternsAndLimit(t *testing.T) { - root := t.TempDir() - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, GetDefaultCollectorConfig(), root, types.ProxmoxBS, false) - - mk := func(rel, content string) { - path := filepath.Join(root, rel) - _ = os.MkdirAll(filepath.Dir(path), 0o755) - _ = os.WriteFile(path, []byte(content), 0o640) - } - mk("keep1.txt", "data") - mk("skip.log", "data") - mk(filepath.Join("nested", "keep2.txt"), "data") - - ctx := context.Background() - include := []string{"*.txt"} - exclude := []string{"skip*"} - - results, err := c.sampleFiles(ctx, root, include, exclude, 3, 2) - if err != nil { - t.Fatalf("sampleFiles error: %v", err) - } - if len(results) != 2 { - t.Fatalf("expected 2 results (limit), got %d", len(results)) - } - for _, r := range results { - if filepath.Ext(r.RelativePath) != ".txt" { - t.Fatalf("unexpected file in results: %+v", r) - } - if r.SizeHuman == "" { - t.Fatalf("SizeHuman should be set") - } - } -} - -func TestSampleFilesLimitZeroReturnsEmpty(t *testing.T) { - root := t.TempDir() - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, GetDefaultCollectorConfig(), root, types.ProxmoxBS, false) - - results, err := c.sampleFiles(context.Background(), root, nil, nil, 3, 0) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if len(results) != 0 { - t.Fatalf("expected empty result, got %d", len(results)) - } -} - -func TestSampleFilesReadDirErrorPropagates(t *testing.T) { - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, GetDefaultCollectorConfig(), t.TempDir(), types.ProxmoxBS, false) - - _, err := c.sampleFiles(context.Background(), filepath.Join(t.TempDir(), "missing"), nil, nil, 3, 1) - if err == nil { - t.Fatalf("expected error for missing root") - } -} - -func TestSampleFilesLimitTriggersDuringTopLevelScan(t *testing.T) { - root := t.TempDir() - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, GetDefaultCollectorConfig(), root, types.ProxmoxBS, false) - - for i := 0; i < 5; i++ { - name := fmt.Sprintf("file-%d.txt", i) - if err := os.WriteFile(filepath.Join(root, name), []byte("x"), 0o644); err != nil { - t.Fatalf("write %s: %v", name, err) - } - } - - results, err := c.sampleFiles(context.Background(), root, nil, nil, 3, 1) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if len(results) != 1 { - t.Fatalf("expected 1 result due to limit, got %d", len(results)) - } -} - -func TestSampleFilesReturnsWhenNoWorkerRoots(t *testing.T) { - root := t.TempDir() - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, GetDefaultCollectorConfig(), root, types.ProxmoxBS, false) - - if err := os.WriteFile(filepath.Join(root, "top.txt"), []byte("x"), 0o644); err != nil { - t.Fatalf("write top.txt: %v", err) - } - - results, err := c.sampleFiles(context.Background(), root, nil, nil, 3, 10) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if len(results) != 1 { - t.Fatalf("expected 1 result, got %d", len(results)) - } -} - -func TestSampleFilesUsesDefaultWorkerLimitWhenZero(t *testing.T) { - root := t.TempDir() - cfg := GetDefaultCollectorConfig() - cfg.PxarIntraConcurrency = 0 - - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, cfg, t.TempDir(), types.ProxmoxBS, false) - - if err := os.MkdirAll(filepath.Join(root, "nested"), 0o755); err != nil { - t.Fatalf("mkdir nested: %v", err) - } - if err := os.WriteFile(filepath.Join(root, "nested", "n.txt"), []byte("x"), 0o644); err != nil { - t.Fatalf("write n.txt: %v", err) - } - - key := fmt.Sprintf("%s|fanout=%d|max=%d", root, c.config.PxarScanFanoutLevel, c.config.PxarScanMaxRoots) - c.rootsCache[key] = []string{root} - - results, err := c.sampleFiles(context.Background(), root, nil, nil, 3, 10) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if len(results) == 0 { - t.Fatalf("expected results from nested walk") - } -} - -func TestSampleFilesSkipsCollectorExcludeAndNonMatchingIncludeAndBrokenSymlinkInfo(t *testing.T) { - root := t.TempDir() - cfg := GetDefaultCollectorConfig() - cfg.ExcludePatterns = []string{"excluded.txt"} - - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, cfg, t.TempDir(), types.ProxmoxBS, false) - - if err := os.WriteFile(filepath.Join(root, "keep.txt"), []byte("ok"), 0o644); err != nil { - t.Fatalf("write keep.txt: %v", err) - } - if err := os.WriteFile(filepath.Join(root, "excluded.txt"), []byte("no"), 0o644); err != nil { - t.Fatalf("write excluded.txt: %v", err) - } - if err := os.WriteFile(filepath.Join(root, "skip.log"), []byte("log"), 0o644); err != nil { - t.Fatalf("write skip.log: %v", err) - } - if err := os.Symlink("missing-target", filepath.Join(root, "broken")); err != nil { - t.Fatalf("symlink broken: %v", err) - } - - results, err := c.sampleFiles(context.Background(), root, []string{"*.txt"}, nil, 3, 10) - if err != nil { - t.Fatalf("sampleFiles error: %v", err) - } - if len(results) != 1 || results[0].RelativePath != "keep.txt" { - t.Fatalf("expected only keep.txt, got %#v", results) - } -} - -func TestSampleFilesSkipsExcludedDirsAndRespectsMaxDepthInWorkerWalk(t *testing.T) { - root := t.TempDir() - cfg := GetDefaultCollectorConfig() - cfg.ExcludePatterns = []string{"skip"} - - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, cfg, t.TempDir(), types.ProxmoxBS, false) - - if err := os.MkdirAll(filepath.Join(root, "skip", "inner"), 0o755); err != nil { - t.Fatalf("mkdir skip/inner: %v", err) - } - if err := os.WriteFile(filepath.Join(root, "skip", "inner", "skip.txt"), []byte("no"), 0o644); err != nil { - t.Fatalf("write skip file: %v", err) - } - - if err := os.MkdirAll(filepath.Join(root, "deep", "inner"), 0o755); err != nil { - t.Fatalf("mkdir deep/inner: %v", err) - } - if err := os.WriteFile(filepath.Join(root, "deep", "ok.txt"), []byte("ok"), 0o644); err != nil { - t.Fatalf("write deep/ok.txt: %v", err) - } - if err := os.WriteFile(filepath.Join(root, "deep", "inner", "too-deep.txt"), []byte("no"), 0o644); err != nil { - t.Fatalf("write deep/inner/too-deep.txt: %v", err) - } - - key := fmt.Sprintf("%s|fanout=%d|max=%d", root, c.config.PxarScanFanoutLevel, c.config.PxarScanMaxRoots) - c.rootsCache[key] = []string{root} - - results, err := c.sampleFiles(context.Background(), root, nil, nil, 1, 10) - if err != nil { - t.Fatalf("sampleFiles error: %v", err) - } - - paths := make([]string, 0, len(results)) - for _, r := range results { - paths = append(paths, r.RelativePath) - } - if !reflect.DeepEqual(paths, []string{"deep/ok.txt"}) { - t.Fatalf("expected only deep/ok.txt due to exclusions and maxDepth, got %v", paths) - } -} - -func TestSampleDirectoriesDepthAndLimit(t *testing.T) { - root := t.TempDir() - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, GetDefaultCollectorConfig(), root, types.ProxmoxVE, false) - - makeDir := func(rel string) { - _ = os.MkdirAll(filepath.Join(root, rel), 0o755) - } - makeDir("a/b") - makeDir("c") - makeDir("d/e/f") - - key := fmt.Sprintf("%s|fanout=%d|max=%d", root, c.config.PxarScanFanoutLevel, c.config.PxarScanMaxRoots) - c.rootsCache[key] = []string{root} - - ctx := context.Background() - dirs, err := c.sampleDirectories(ctx, root, 1, 2) - if err != nil { - t.Fatalf("sampleDirectories error: %v", err) - } - if len(dirs) != 2 { - t.Fatalf("expected limit 2, got %d", len(dirs)) - } - for _, d := range dirs { - if strings.Count(d, "/") > 0 { - t.Fatalf("expected depth < 1, got %s", d) - } - } -} - -func TestComputePxarWorkerRootsFallbackToIntermediateLevelAndDownsamples(t *testing.T) { - root := t.TempDir() - for _, p := range []string{ - "a/a1", - "a/a2", - "b/b1", - "c/c1", - "d/d1", - } { - if err := os.MkdirAll(filepath.Join(root, p), 0o755); err != nil { - t.Fatalf("mkdir %s: %v", p, err) - } - } - - cfg := GetDefaultCollectorConfig() - cfg.PxarScanFanoutLevel = 3 - cfg.PxarScanMaxRoots = 2 - cfg.PxarEnumWorkers = 1 - - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, cfg, t.TempDir(), types.ProxmoxBS, false) - - ctx := context.Background() - roots, err := c.computePxarWorkerRoots(ctx, root, "fallback-test") - if err != nil { - t.Fatalf("computePxarWorkerRoots error: %v", err) - } - if len(roots) != 2 { - t.Fatalf("expected downsampled roots len 2, got %d (%v)", len(roots), roots) - } - for _, r := range roots { - if _, err := os.Stat(r); err != nil { - t.Fatalf("expected root to exist (%s): %v", r, err) - } - rel, err := filepath.Rel(root, r) - if err != nil { - t.Fatalf("rel error: %v", err) - } - if strings.Count(rel, string(filepath.Separator)) != 1 { - t.Fatalf("expected fallback roots at depth 2, got %s (rel=%s)", r, rel) - } - } -} - -func TestSampleDirectoriesLimitZeroReturnsEmpty(t *testing.T) { - root := t.TempDir() - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, GetDefaultCollectorConfig(), root, types.ProxmoxVE, false) - - results, err := c.sampleDirectories(context.Background(), root, 2, 0) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if len(results) != 0 { - t.Fatalf("expected empty result, got %d", len(results)) - } -} - -func TestSampleDirectoriesReturnsWhenNoWorkerRoots(t *testing.T) { - root := t.TempDir() - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, GetDefaultCollectorConfig(), root, types.ProxmoxVE, false) - - results, err := c.sampleDirectories(context.Background(), root, 2, 10) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if len(results) != 0 { - t.Fatalf("expected empty result, got %d", len(results)) - } -} - -func TestSampleDirectoriesStopsAtLimit(t *testing.T) { - root := t.TempDir() - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, GetDefaultCollectorConfig(), root, types.ProxmoxVE, false) - - for _, d := range []string{"a", "b", "c"} { - if err := os.MkdirAll(filepath.Join(root, d, "x"), 0o755); err != nil { - t.Fatalf("mkdir %s: %v", d, err) - } - } - - key := fmt.Sprintf("%s|fanout=%d|max=%d", root, c.config.PxarScanFanoutLevel, c.config.PxarScanMaxRoots) - c.rootsCache[key] = []string{root} - - dirs, err := c.sampleDirectories(context.Background(), root, 3, 1) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if len(dirs) != 1 { - t.Fatalf("expected 1 result due to limit, got %d", len(dirs)) - } -} - -func TestSampleDirectoriesReturnsErrorWhenWorkerStartDirMissing(t *testing.T) { - root := t.TempDir() - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, GetDefaultCollectorConfig(), t.TempDir(), types.ProxmoxVE, false) - - key := fmt.Sprintf("%s|fanout=%d|max=%d", root, c.config.PxarScanFanoutLevel, c.config.PxarScanMaxRoots) - c.rootsCache[key] = []string{filepath.Join(root, "missing")} - - _, err := c.sampleDirectories(context.Background(), root, 2, 10) - if err == nil { - t.Fatalf("expected error when startDir is missing") - } -} - -func TestSampleDirectoriesUsesDefaultWorkerLimitAndSkipsExcludedDirs(t *testing.T) { - root := t.TempDir() - cfg := GetDefaultCollectorConfig() - cfg.PxarIntraConcurrency = 0 - // shouldExclude() tests patterns against multiple "candidates" including the basename, - // so using "skip" reliably excludes the directory itself and thus its subtree via SkipDir. - cfg.ExcludePatterns = []string{"skip"} - - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, cfg, t.TempDir(), types.ProxmoxVE, false) - - for _, d := range []string{"keep/inner", "skip/inner"} { - if err := os.MkdirAll(filepath.Join(root, d), 0o755); err != nil { - t.Fatalf("mkdir %s: %v", d, err) - } - } - - key := fmt.Sprintf("%s|fanout=%d|max=%d", root, c.config.PxarScanFanoutLevel, c.config.PxarScanMaxRoots) - c.rootsCache[key] = []string{root} - - dirs, err := c.sampleDirectories(context.Background(), root, 3, 10) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - for _, d := range dirs { - if strings.HasPrefix(d, "skip") { - t.Fatalf("expected excluded directories to be skipped, got %v", dirs) - } - } -} - -func TestSampleDirectoriesReturnsNilOnCanceledContextWithoutStartingWorkers(t *testing.T) { - root := t.TempDir() - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, GetDefaultCollectorConfig(), t.TempDir(), types.ProxmoxVE, false) - - if err := os.MkdirAll(filepath.Join(root, "keep"), 0o755); err != nil { - t.Fatalf("mkdir keep: %v", err) - } - key := fmt.Sprintf("%s|fanout=%d|max=%d", root, c.config.PxarScanFanoutLevel, c.config.PxarScanMaxRoots) - c.rootsCache[key] = []string{root} - - ctx, cancel := context.WithCancel(context.Background()) - cancel() - - dirs, err := c.sampleDirectories(ctx, root, 2, 10) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if len(dirs) != 0 { - t.Fatalf("expected empty result, got %v", dirs) - } -} - -func TestSampleDirectoriesReturnsContextErrorWhenNotCanceled(t *testing.T) { - root := t.TempDir() - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, GetDefaultCollectorConfig(), t.TempDir(), types.ProxmoxVE, false) - - if err := os.MkdirAll(filepath.Join(root, "keep"), 0o755); err != nil { - t.Fatalf("mkdir keep: %v", err) - } - key := fmt.Sprintf("%s|fanout=%d|max=%d", root, c.config.PxarScanFanoutLevel, c.config.PxarScanMaxRoots) - c.rootsCache[key] = []string{root} - - errBoom := fmt.Errorf("boom") - _, err := c.sampleDirectories(newClosedDoneContext(errBoom), root, 2, 10) - if err == nil || err.Error() != errBoom.Error() { - t.Fatalf("expected %v, got %v", errBoom, err) - } -} - -func TestSampleDirectoriesSkipsExcludedFiles(t *testing.T) { - root := t.TempDir() - cfg := GetDefaultCollectorConfig() - cfg.ExcludePatterns = []string{"skip.txt"} - - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, cfg, t.TempDir(), types.ProxmoxVE, false) - - if err := os.MkdirAll(filepath.Join(root, "keep"), 0o755); err != nil { - t.Fatalf("mkdir keep: %v", err) - } - if err := os.WriteFile(filepath.Join(root, "skip.txt"), []byte("nope"), 0o644); err != nil { - t.Fatalf("write skip.txt: %v", err) - } - key := fmt.Sprintf("%s|fanout=%d|max=%d", root, c.config.PxarScanFanoutLevel, c.config.PxarScanMaxRoots) - c.rootsCache[key] = []string{root} - - dirs, err := c.sampleDirectories(context.Background(), root, 2, 10) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - foundKeep := false - for _, d := range dirs { - if d == "keep" { - foundKeep = true - } - } - if !foundKeep { - t.Fatalf("expected keep in results, got %v", dirs) - } -} - -func TestComputePxarWorkerRootsNormalizesDefaults(t *testing.T) { - root := t.TempDir() - if err := os.MkdirAll(filepath.Join(root, "a"), 0o755); err != nil { - t.Fatalf("mkdir a: %v", err) - } - - cfg := GetDefaultCollectorConfig() - cfg.PxarScanFanoutLevel = 0 - cfg.PxarScanMaxRoots = 0 - cfg.PxarEnumWorkers = 0 - cfg.PxarEnumBudgetMs = 1 - - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, cfg, t.TempDir(), types.ProxmoxBS, false) - - roots, err := c.computePxarWorkerRoots(context.Background(), root, "defaults") - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if len(roots) == 0 { - t.Fatalf("expected some roots, got %v", roots) - } -} - -func TestComputePxarWorkerRootsReturnsNilWhenNoDirsFound(t *testing.T) { - root := t.TempDir() - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, GetDefaultCollectorConfig(), t.TempDir(), types.ProxmoxBS, false) - - roots, err := c.computePxarWorkerRoots(context.Background(), root, "empty") - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if roots != nil { - t.Fatalf("expected nil roots, got %v", roots) - } -} - -func TestComputePxarWorkerRootsCapsAndSkipsExcludedChildren(t *testing.T) { - root := t.TempDir() - for _, d := range []string{"keep1", "keep2", "skip"} { - if err := os.MkdirAll(filepath.Join(root, d), 0o755); err != nil { - t.Fatalf("mkdir %s: %v", d, err) - } - } - - cfg := GetDefaultCollectorConfig() - cfg.ExcludePatterns = []string{"skip"} - cfg.PxarScanFanoutLevel = 1 - cfg.PxarScanMaxRoots = 1 - cfg.PxarEnumWorkers = 1 - cfg.PxarStopOnCap = false - - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, cfg, t.TempDir(), types.ProxmoxBS, false) - - roots, err := c.computePxarWorkerRoots(context.Background(), root, "cap-test") - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if len(roots) != 1 { - t.Fatalf("expected 1 root due to cap, got %v", roots) - } - for _, r := range roots { - if strings.Contains(r, string(filepath.Separator)+"skip") || filepath.Base(r) == "skip" { - t.Fatalf("expected excluded dir not to be returned, got %v", roots) - } - } -} - -func TestComputePxarWorkerRootsBudgetExceededReturnsNil(t *testing.T) { - cfg := GetDefaultCollectorConfig() - cfg.PxarScanFanoutLevel = 2 - cfg.PxarEnumBudgetMs = 1 - - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, cfg, t.TempDir(), types.ProxmoxBS, false) - - roots, err := c.computePxarWorkerRoots(newClosedDoneContext(context.DeadlineExceeded), t.TempDir(), "budget-test") - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if roots != nil { - t.Fatalf("expected nil roots, got %v", roots) - } -} - -func TestComputePxarWorkerRootsDebugProgressStopsOnChannelClose(t *testing.T) { - root := t.TempDir() - if err := os.MkdirAll(filepath.Join(root, "a"), 0o755); err != nil { - t.Fatalf("mkdir a: %v", err) - } - - cfg := GetDefaultCollectorConfig() - cfg.PxarScanFanoutLevel = 1 - cfg.PxarScanMaxRoots = 1 - cfg.PxarEnumWorkers = 1 - - logger := logging.New(types.LogLevelDebug, false) - c := NewCollector(logger, cfg, t.TempDir(), types.ProxmoxBS, false) - - roots, err := c.computePxarWorkerRoots(context.Background(), root, "debug-progress") - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if len(roots) == 0 { - t.Fatalf("expected some roots, got %v", roots) - } -} - -func TestComputePxarWorkerRootsDebugProgressStopsOnCtxDone(t *testing.T) { - cfg := GetDefaultCollectorConfig() - cfg.PxarScanFanoutLevel = 1 - - logger := logging.New(types.LogLevelDebug, false) - c := NewCollector(logger, cfg, t.TempDir(), types.ProxmoxBS, false) - - roots, err := c.computePxarWorkerRoots(newClosedDoneContext(context.DeadlineExceeded), t.TempDir(), "debug-ctxdone") - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if roots != nil { - t.Fatalf("expected nil roots, got %v", roots) - } -} - -func TestSampleFilesReturnsErrorWhenWorkerStartDirMissing(t *testing.T) { - root := t.TempDir() - logger := logging.New(types.LogLevelError, false) - c := NewCollector(logger, GetDefaultCollectorConfig(), t.TempDir(), types.ProxmoxBS, false) - - key := fmt.Sprintf("%s|fanout=%d|max=%d", root, c.config.PxarScanFanoutLevel, c.config.PxarScanMaxRoots) - c.rootsCache[key] = []string{filepath.Join(root, "missing")} - - _, err := c.sampleFiles(context.Background(), root, nil, nil, 3, 10) - if err == nil { - t.Fatalf("expected error when startDir is missing") - } -} diff --git a/internal/backup/fs_sampling_bounded.go b/internal/backup/fs_sampling_bounded.go new file mode 100644 index 0000000..7755115 --- /dev/null +++ b/internal/backup/fs_sampling_bounded.go @@ -0,0 +1,148 @@ +package backup + +import ( + "context" + "errors" + "path/filepath" + "strings" + "time" + + "github.com/tis24dev/proxsave/internal/safefs" +) + +func (c *Collector) sampleDirectoriesBounded(ctx context.Context, root string, maxDepth, limit int, ioTimeout time.Duration) ([]string, error) { + results := make([]string, 0, limit) + if limit <= 0 || maxDepth <= 0 { + return results, nil + } + + root = filepath.Clean(root) + stack := []string{root} + + for len(stack) > 0 && len(results) < limit { + if err := ctx.Err(); err != nil { + return results, err + } + dirPath := stack[len(stack)-1] + stack = stack[:len(stack)-1] + + entries, err := safefs.ReadDir(ctx, dirPath, ioTimeout) + if err != nil { + return results, err + } + + for _, entry := range entries { + if err := ctx.Err(); err != nil { + return results, err + } + if !entry.IsDir() { + continue + } + child := filepath.Join(dirPath, entry.Name()) + if c.shouldExclude(child) { + continue + } + + rel, relErr := filepath.Rel(root, child) + if relErr != nil || rel == "." || strings.HasPrefix(rel, "..") { + continue + } + rel = filepath.ToSlash(rel) + depth := strings.Count(rel, "/") + if depth >= maxDepth { + continue + } + + results = append(results, rel) + if len(results) >= limit { + break + } + if depth < maxDepth-1 { + stack = append(stack, child) + } + } + } + + return results, nil +} + +func (c *Collector) sampleFilesBounded(ctx context.Context, root string, includePatterns, excludePatterns []string, maxDepth, limit int, ioTimeout time.Duration) ([]FileSummary, error) { + results := make([]FileSummary, 0, limit) + if limit <= 0 { + return results, nil + } + + root = filepath.Clean(root) + stack := []string{root} + + for len(stack) > 0 && len(results) < limit { + if err := ctx.Err(); err != nil { + return results, err + } + dirPath := stack[len(stack)-1] + stack = stack[:len(stack)-1] + + entries, err := safefs.ReadDir(ctx, dirPath, ioTimeout) + if err != nil { + return results, err + } + + for _, entry := range entries { + if err := ctx.Err(); err != nil { + return results, err + } + + name := entry.Name() + full := filepath.Join(dirPath, name) + if c.shouldExclude(full) { + continue + } + + if entry.IsDir() { + rel, relErr := filepath.Rel(root, full) + if relErr != nil || rel == "." || strings.HasPrefix(rel, "..") { + continue + } + rel = filepath.ToSlash(rel) + depth := strings.Count(rel, "/") + if depth >= maxDepth { + continue + } + stack = append(stack, full) + continue + } + + rel, relErr := filepath.Rel(root, full) + if relErr != nil || rel == "." || strings.HasPrefix(rel, "..") { + continue + } + + if len(excludePatterns) > 0 && matchAnyPattern(excludePatterns, name, rel) { + continue + } + if len(includePatterns) > 0 && !matchAnyPattern(includePatterns, name, rel) { + continue + } + + info, err := safefs.Stat(ctx, full, ioTimeout) + if err != nil { + if errors.Is(err, safefs.ErrTimeout) { + return results, err + } + continue + } + + results = append(results, FileSummary{ + RelativePath: filepath.ToSlash(rel), + SizeBytes: info.Size(), + SizeHuman: FormatBytes(info.Size()), + ModTime: info.ModTime(), + }) + if len(results) >= limit { + break + } + } + } + + return results, nil +} diff --git a/internal/backup/fs_sampling_bounded_test.go b/internal/backup/fs_sampling_bounded_test.go new file mode 100644 index 0000000..096f0a4 --- /dev/null +++ b/internal/backup/fs_sampling_bounded_test.go @@ -0,0 +1,137 @@ +package backup + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/tis24dev/proxsave/internal/types" +) + +func TestSampleDirectoriesBoundedRespectsDepthAndLimit(t *testing.T) { + root := t.TempDir() + for _, rel := range []string{ + filepath.Join("a", "b"), + "c", + filepath.Join("d", "e", "f"), + } { + if err := os.MkdirAll(filepath.Join(root, rel), 0o755); err != nil { + t.Fatalf("mkdir %s: %v", rel, err) + } + } + + c := NewCollector(newTestLogger(), GetDefaultCollectorConfig(), t.TempDir(), types.ProxmoxVE, false) + + dirs, err := c.sampleDirectoriesBounded(context.Background(), root, 1, 10, 0) + if err != nil { + t.Fatalf("sampleDirectoriesBounded error: %v", err) + } + if len(dirs) != 3 { + t.Fatalf("expected 3 top-level dirs, got %v", dirs) + } + for _, d := range dirs { + if strings.Contains(d, "/") { + t.Fatalf("expected top-level dir, got %q", d) + } + } + + dirs, err = c.sampleDirectoriesBounded(context.Background(), root, 2, 20, 0) + if err != nil { + t.Fatalf("sampleDirectoriesBounded error: %v", err) + } + want := map[string]bool{ + "a": true, + "a/b": true, + "c": true, + "d": true, + "d/e": true, + } + for _, got := range dirs { + delete(want, got) + if got == "d/e/f" { + t.Fatalf("unexpected deep dir %q in results: %v", got, dirs) + } + } + if len(want) != 0 { + t.Fatalf("missing expected directories: %#v (got %v)", want, dirs) + } + + limited, err := c.sampleDirectoriesBounded(context.Background(), root, 1, 2, 0) + if err != nil { + t.Fatalf("sampleDirectoriesBounded error: %v", err) + } + if len(limited) != 2 { + t.Fatalf("expected limit=2 results, got %v", limited) + } +} + +func TestSampleFilesBoundedRespectsPatternsExcludeAndDepth(t *testing.T) { + root := t.TempDir() + write := func(rel, content string) { + path := filepath.Join(root, rel) + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + t.Fatalf("mkdir %s: %v", filepath.Dir(rel), err) + } + if err := os.WriteFile(path, []byte(content), 0o640); err != nil { + t.Fatalf("write %s: %v", rel, err) + } + } + + write("keep1.txt", "data") + write("excluded.txt", "data") + write("skip_me.txt", "data") + write(filepath.Join("nested", "keep2.txt"), "data") + write(filepath.Join("nested", "deep", "keep3.txt"), "data") + + cfg := GetDefaultCollectorConfig() + cfg.ExcludePatterns = []string{"excluded.txt"} + c := NewCollector(newTestLogger(), cfg, t.TempDir(), types.ProxmoxBS, false) + + include := []string{"*.txt"} + exclude := []string{"skip*"} + results, err := c.sampleFilesBounded(context.Background(), root, include, exclude, 1, 50, 0) + if err != nil { + t.Fatalf("sampleFilesBounded error: %v", err) + } + + got := map[string]FileSummary{} + for _, r := range results { + got[r.RelativePath] = r + if strings.Contains(r.RelativePath, `\`) { + t.Fatalf("expected forward-slash relative path, got %q", r.RelativePath) + } + if r.SizeHuman == "" || r.SizeBytes <= 0 { + t.Fatalf("expected populated size fields, got %+v", r) + } + } + + if _, ok := got["keep1.txt"]; !ok { + t.Fatalf("expected keep1.txt in results: %v", results) + } + if _, ok := got["nested/keep2.txt"]; !ok { + t.Fatalf("expected nested/keep2.txt in results: %v", results) + } + if _, ok := got["excluded.txt"]; ok { + t.Fatalf("expected excluded.txt to be skipped: %v", results) + } + if _, ok := got["skip_me.txt"]; ok { + t.Fatalf("expected skip_me.txt to be excluded by pattern: %v", results) + } + if _, ok := got["nested/deep/keep3.txt"]; ok { + t.Fatalf("expected nested/deep/keep3.txt to be skipped due to maxDepth: %v", results) + } +} + +func TestSampleFilesBoundedLimitZeroReturnsEmpty(t *testing.T) { + root := t.TempDir() + c := NewCollector(newTestLogger(), GetDefaultCollectorConfig(), t.TempDir(), types.ProxmoxBS, false) + results, err := c.sampleFilesBounded(context.Background(), root, nil, nil, 2, 0, 0) + if err != nil { + t.Fatalf("sampleFilesBounded error: %v", err) + } + if len(results) != 0 { + t.Fatalf("expected empty results, got %v", results) + } +} diff --git a/internal/backup/optimizations.go b/internal/backup/optimizations.go index c4e8892..f31943f 100644 --- a/internal/backup/optimizations.go +++ b/internal/backup/optimizations.go @@ -9,7 +9,6 @@ import ( "io" "os" "path/filepath" - "sort" "strings" "github.com/tis24dev/proxsave/internal/logging" @@ -310,7 +309,37 @@ func prefilterFiles(ctx context.Context, logger *logging.Logger, root string, ma } logger.Debug("Prefiltering files under %s (max size %d bytes)", root, maxSize) - var processed int + type prefilterStats struct { + scanned int + optimized int + skippedStructured int + skippedSymlink int + } + var stats prefilterStats + + isStructuredConfigPath := func(path string) bool { + rel, err := filepath.Rel(root, path) + if err != nil { + return false + } + rel = filepath.ToSlash(filepath.Clean(rel)) + rel = strings.TrimPrefix(rel, "./") + switch { + case strings.HasPrefix(rel, "etc/proxmox-backup/"): + return true + case strings.HasPrefix(rel, "etc/pve/"): + return true + case strings.HasPrefix(rel, "etc/ssh/"): + return true + case strings.HasPrefix(rel, "etc/pam.d/"): + return true + case strings.HasPrefix(rel, "etc/systemd/system/"): + return true + default: + return false + } + } + err := filepath.WalkDir(root, func(path string, d os.DirEntry, err error) error { if err != nil { return err @@ -322,27 +351,39 @@ func prefilterFiles(ctx context.Context, logger *logging.Logger, root string, ma return nil } - info, err := d.Info() - if err != nil { + info, err := os.Lstat(path) + if err != nil || info == nil { + return nil + } + if info.Mode()&os.ModeSymlink != 0 { + stats.skippedSymlink++ + return nil + } + if !info.Mode().IsRegular() { return nil } if info.Size() == 0 || info.Size() > maxSize { return nil } + stats.scanned++ ext := strings.ToLower(filepath.Ext(path)) switch ext { case ".txt", ".log", ".md": - if err := normalizeTextFile(path); err == nil { - processed++ + if changed, err := normalizeTextFile(path); err == nil && changed { + stats.optimized++ } case ".conf", ".cfg", ".ini": - if err := normalizeConfigFile(path); err == nil { - processed++ + if isStructuredConfigPath(path) { + stats.skippedStructured++ + return nil + } + if changed, err := normalizeConfigFile(path); err == nil && changed { + stats.optimized++ } case ".json": - if err := minifyJSON(path); err == nil { - processed++ + if changed, err := minifyJSON(path); err == nil && changed { + stats.optimized++ } } return nil @@ -352,52 +393,43 @@ func prefilterFiles(ctx context.Context, logger *logging.Logger, root string, ma return fmt.Errorf("prefilter walk failed: %w", err) } - logger.Info("Prefilter completed: %d files optimized", processed) + logger.Info("Prefilter completed: optimized=%d scanned=%d skipped_structured=%d skipped_symlink=%d", stats.optimized, stats.scanned, stats.skippedStructured, stats.skippedSymlink) return nil } -func normalizeTextFile(path string) error { +func normalizeTextFile(path string) (bool, error) { data, err := os.ReadFile(path) if err != nil { - return err + return false, err } normalized := bytes.ReplaceAll(data, []byte("\r"), nil) if bytes.Equal(data, normalized) { - return nil + return false, nil } - return os.WriteFile(path, normalized, defaultChunkFilePerm) + return true, os.WriteFile(path, normalized, defaultChunkFilePerm) } -func normalizeConfigFile(path string) error { - data, err := os.ReadFile(path) - if err != nil { - return err - } - lines := strings.Split(string(data), "\n") - filtered := lines[:0] - for _, line := range lines { - line = strings.TrimSpace(line) - if line == "" || strings.HasPrefix(line, "#") || strings.HasPrefix(line, ";") { - continue - } - filtered = append(filtered, line) - } - sort.Strings(filtered) - return os.WriteFile(path, []byte(strings.Join(filtered, "\n")), defaultChunkFilePerm) +func normalizeConfigFile(path string) (bool, error) { + // Config files can be whitespace/ordering-sensitive (e.g. section headers). + // Only perform safe, semantic-preserving normalization here. + return normalizeTextFile(path) } -func minifyJSON(path string) error { +func minifyJSON(path string) (bool, error) { data, err := os.ReadFile(path) if err != nil { - return err + return false, err } var tmp any if err := json.Unmarshal(data, &tmp); err != nil { - return err + return false, err } minified, err := json.Marshal(tmp) if err != nil { - return err + return false, err + } + if bytes.Equal(bytes.TrimSpace(data), minified) { + return false, nil } - return os.WriteFile(path, minified, defaultChunkFilePerm) + return true, os.WriteFile(path, minified, defaultChunkFilePerm) } diff --git a/internal/backup/optimizations_helpers_test.go b/internal/backup/optimizations_helpers_test.go index 2c8032e..7c6fffa 100644 --- a/internal/backup/optimizations_helpers_test.go +++ b/internal/backup/optimizations_helpers_test.go @@ -5,6 +5,7 @@ import ( "encoding/json" "os" "path/filepath" + "strings" "testing" ) @@ -46,8 +47,10 @@ func TestNormalizeTextFileAndConfigAndJSON(t *testing.T) { if err := os.WriteFile(textPath, []byte("line1\r\nline2\r\n"), 0o640); err != nil { t.Fatalf("write text: %v", err) } - if err := normalizeTextFile(textPath); err != nil { + if changed, err := normalizeTextFile(textPath); err != nil { t.Fatalf("normalizeTextFile: %v", err) + } else if !changed { + t.Fatalf("expected text to be normalized") } data, _ := os.ReadFile(textPath) if bytes.Contains(data, []byte("\r")) { @@ -55,24 +58,31 @@ func TestNormalizeTextFileAndConfigAndJSON(t *testing.T) { } cfgPath := filepath.Join(tmp, "app.conf") - cfgContent := "#comment\nz=1\n\n;ignored\na=2\n" + cfgContent := "#comment\r\nz=1\r\n\r\n;ignored\r\na=2\r\n" if err := os.WriteFile(cfgPath, []byte(cfgContent), 0o640); err != nil { t.Fatalf("write conf: %v", err) } - if err := normalizeConfigFile(cfgPath); err != nil { + if changed, err := normalizeConfigFile(cfgPath); err != nil { t.Fatalf("normalizeConfigFile: %v", err) + } else if !changed { + t.Fatalf("expected config to be normalized") } cfgData, _ := os.ReadFile(cfgPath) - if string(cfgData) != "a=2\nz=1" { - t.Fatalf("config not normalized/sorted, got %q", cfgData) + if bytes.Contains(cfgData, []byte("\r")) { + t.Fatalf("expected CR removed from config, got %q", cfgData) + } + if string(cfgData) != strings.ReplaceAll(cfgContent, "\r", "") { + t.Fatalf("config contents changed unexpectedly, got %q", cfgData) } jsonPath := filepath.Join(tmp, "data.json") if err := os.WriteFile(jsonPath, []byte("{\n \"a\": 1,\n \"b\": 2\n}\n"), 0o640); err != nil { t.Fatalf("write json: %v", err) } - if err := minifyJSON(jsonPath); err != nil { + if changed, err := minifyJSON(jsonPath); err != nil { t.Fatalf("minifyJSON: %v", err) + } else if !changed { + t.Fatalf("expected JSON to be minified") } jdata, _ := os.ReadFile(jsonPath) if bytes.Contains(jdata, []byte(" ")) || bytes.Contains(jdata, []byte("\n")) { @@ -82,7 +92,7 @@ func TestNormalizeTextFileAndConfigAndJSON(t *testing.T) { if err := os.WriteFile(jsonPath, []byte("{invalid"), 0o640); err != nil { t.Fatalf("write invalid json: %v", err) } - if err := minifyJSON(jsonPath); err == nil { + if _, err := minifyJSON(jsonPath); err == nil { t.Fatalf("expected error for invalid json") } } @@ -95,8 +105,10 @@ func TestMinifyJSONKeepsData(t *testing.T) { if err := os.WriteFile(path, payload, 0o640); err != nil { t.Fatalf("write json: %v", err) } - if err := minifyJSON(path); err != nil { + if changed, err := minifyJSON(path); err != nil { t.Fatalf("minifyJSON: %v", err) + } else if !changed { + t.Fatalf("expected JSON to be minified") } roundTrip, _ := os.ReadFile(path) var decoded map[string]int diff --git a/internal/backup/optimizations_structured_test.go b/internal/backup/optimizations_structured_test.go new file mode 100644 index 0000000..5d20129 --- /dev/null +++ b/internal/backup/optimizations_structured_test.go @@ -0,0 +1,77 @@ +package backup + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/tis24dev/proxsave/internal/logging" + "github.com/tis24dev/proxsave/internal/types" +) + +func TestPrefilterSkipsStructuredConfigs(t *testing.T) { + tmp := t.TempDir() + + // Create structured config (should be skipped) + pbsDir := filepath.Join(tmp, "etc", "proxmox-backup") + if err := os.MkdirAll(pbsDir, 0o755); err != nil { + t.Fatalf("mkdir: %v", err) + } + + pbsCfg := filepath.Join(pbsDir, "datastore.cfg") + pbsContent := "datastore: Test\n\tpath /mnt/test\n\tcomment Test DS\n" + if err := os.WriteFile(pbsCfg, []byte(pbsContent), 0o640); err != nil { + t.Fatalf("write pbs config: %v", err) + } + + // Create normal config with CRLF (should be normalized) + normalCfg := filepath.Join(tmp, "etc", "normal.cfg") + normalContent := "option1\r\noption2\r\n" + if err := os.WriteFile(normalCfg, []byte(normalContent), 0o640); err != nil { + t.Fatalf("write normal config: %v", err) + } + + // Create log file with CRLF (should be normalized) + logDir := filepath.Join(tmp, "var", "log") + if err := os.MkdirAll(logDir, 0o755); err != nil { + t.Fatalf("mkdir log: %v", err) + } + logFile := filepath.Join(logDir, "test.log") + logContent := "line1\r\nline2\r\n" + if err := os.WriteFile(logFile, []byte(logContent), 0o640); err != nil { + t.Fatalf("write log: %v", err) + } + + // Run prefilter + logger := logging.New(types.LogLevelError, false) + if err := prefilterFiles(context.Background(), logger, tmp, 8*1024*1024); err != nil { + t.Fatalf("prefilterFiles: %v", err) + } + + // Verify PBS config unchanged (TABs preserved) + pbsAfter, _ := os.ReadFile(pbsCfg) + if string(pbsAfter) != pbsContent { + t.Fatalf("PBS config was modified!\nExpected: %q\nGot: %q", pbsContent, string(pbsAfter)) + } + if !strings.Contains(string(pbsAfter), "\t") { + t.Fatalf("PBS config lost TAB indentation") + } + + // Verify normal config normalized (CRLF removed) + normalAfter, _ := os.ReadFile(normalCfg) + if strings.Contains(string(normalAfter), "\r") { + t.Fatalf("Normal config still has CRLF: %q", normalAfter) + } + expectedNormal := strings.ReplaceAll(normalContent, "\r", "") + if string(normalAfter) != expectedNormal { + t.Fatalf("Normal config not normalized correctly\nExpected: %q\nGot: %q", expectedNormal, string(normalAfter)) + } + + // Verify log normalized (CRLF removed) + logAfter, _ := os.ReadFile(logFile) + if strings.Contains(string(logAfter), "\r") { + t.Fatalf("Log file still has CRLF: %q", logAfter) + } +} diff --git a/internal/backup/optimizations_test.go b/internal/backup/optimizations_test.go index b3ae733..1cd7e0c 100644 --- a/internal/backup/optimizations_test.go +++ b/internal/backup/optimizations_test.go @@ -40,7 +40,8 @@ func TestApplyOptimizationsRunsAllStages(t *testing.T) { dupB := mustWriteFile(filepath.Join("dup", "two.txt"), "identical data") logFile := mustWriteFile(filepath.Join("logs", "app.log"), "line one\r\nline two\r\n") - confFile := mustWriteFile(filepath.Join("conf", "settings.conf"), "# comment\nkey=value\n\n;ignored\nalpha=beta\n") + confOriginal := "# comment\nkey=value\n\n;ignored\nalpha=beta\n" + confFile := mustWriteFile(filepath.Join("conf", "settings.conf"), confOriginal) jsonFile := mustWriteFile(filepath.Join("meta", "data.json"), "{\n \"a\": 1,\n \"b\": 2\n}\n") chunkTarget := mustWriteFile("chunk.bin", string(bytes.Repeat([]byte("x"), 96))) @@ -75,7 +76,7 @@ func TestApplyOptimizationsRunsAllStages(t *testing.T) { t.Fatalf("symlink data mismatch, got %q", data) } - // Prefilter should strip CR characters and comments/sort config files. + // Prefilter should strip CR characters and keep config files semantically intact. logContents, err := os.ReadFile(logFile) if err != nil { t.Fatalf("read log file: %v", err) @@ -87,7 +88,7 @@ func TestApplyOptimizationsRunsAllStages(t *testing.T) { if err != nil { t.Fatalf("read config file: %v", err) } - if string(confContents) != "alpha=beta\nkey=value" { + if string(confContents) != confOriginal { t.Fatalf("unexpected config contents: %q", confContents) } jsonContents, err := os.ReadFile(jsonFile) diff --git a/internal/checks/checks.go b/internal/checks/checks.go index 05df098..1a0a59e 100644 --- a/internal/checks/checks.go +++ b/internal/checks/checks.go @@ -7,10 +7,13 @@ import ( "math" "os" "path/filepath" + "strconv" + "strings" "syscall" "time" "github.com/tis24dev/proxsave/internal/logging" + "github.com/tis24dev/proxsave/internal/safefs" ) // createTestFile is a small indirection over os.Create used by permission @@ -26,6 +29,7 @@ var ( osWriteFile = os.WriteFile osSymlink = os.Symlink syncFile = func(f *os.File) error { return f.Sync() } + killFunc = syscall.Kill // tempRootPath is the runtime path used by CheckTempDirectory. // It is a variable to allow tests to use a safe, isolated temporary directory. @@ -61,6 +65,7 @@ type CheckerConfig struct { MinDiskSecondaryGB float64 MinDiskCloudGB float64 SafetyFactor float64 // Multiplier for estimated size (e.g., 1.5 = 50% buffer) + FsIoTimeout time.Duration LockDirPath string LockFilePath string MaxLockAge time.Duration @@ -91,6 +96,9 @@ func (c *CheckerConfig) Validate() error { if c.SafetyFactor < 1.0 { return fmt.Errorf("safety factor must be >= 1.0, got %.2f", c.SafetyFactor) } + if c.FsIoTimeout < 0 { + return fmt.Errorf("filesystem I/O timeout must be >= 0") + } if c.MaxLockAge <= 0 { return fmt.Errorf("max lock age must be positive") } @@ -218,6 +226,48 @@ func (c *Checker) CheckDiskSpace() CheckResult { return result } +type lockFileMetadata struct { + PID int + Host string + Timestamp string +} + +func parseLockFileMetadata(content []byte) lockFileMetadata { + meta := lockFileMetadata{} + for _, line := range strings.Split(string(content), "\n") { + line = strings.TrimSpace(line) + switch { + case strings.HasPrefix(line, "pid="): + if pid, err := strconv.Atoi(strings.TrimPrefix(line, "pid=")); err == nil && pid > 0 { + meta.PID = pid + } + case strings.HasPrefix(line, "host="): + meta.Host = strings.TrimSpace(strings.TrimPrefix(line, "host=")) + case strings.HasPrefix(line, "time="): + meta.Timestamp = strings.TrimSpace(strings.TrimPrefix(line, "time=")) + } + } + return meta +} + +func sameHost(a, b string) bool { + a = strings.ToLower(strings.TrimSpace(a)) + b = strings.ToLower(strings.TrimSpace(b)) + if a == "" || b == "" { + return false + } + if a == b { + return true + } + short := func(s string) string { + if idx := strings.IndexByte(s, '.'); idx > 0 { + return s[:idx] + } + return s + } + return short(a) == short(b) +} + // CheckLockFile checks for stale lock files and creates a new lock func (c *Checker) CheckLockFile() CheckResult { result := CheckResult{ @@ -231,18 +281,72 @@ func (c *Checker) CheckLockFile() CheckResult { } c.logger.Debug("Lock file path: %s", lockPath) + info, statErr := osStat(lockPath) + if statErr != nil && !os.IsNotExist(statErr) { + result.Error = fmt.Errorf("failed to stat lock file: %w", statErr) + result.Message = result.Error.Error() + return result + } + // Check if lock file exists - if _, err := osStat(lockPath); err == nil { - // Lock file exists, check its age - info, err := osStat(lockPath) - if err != nil { - result.Error = fmt.Errorf("failed to stat lock file: %w", err) - result.Message = result.Error.Error() - return result + if statErr == nil { + age := time.Since(info.ModTime()) + + formatInProgress := func(age time.Duration, meta lockFileMetadata) string { + parts := []string{fmt.Sprintf("lock age: %v", age)} + if meta.PID > 0 { + parts = append(parts, fmt.Sprintf("pid=%d", meta.PID)) + } + if meta.Host != "" { + parts = append(parts, fmt.Sprintf("host=%s", meta.Host)) + } + if meta.Timestamp != "" { + parts = append(parts, fmt.Sprintf("time=%s", meta.Timestamp)) + } + return "Another backup is in progress (" + strings.Join(parts, ", ") + ")" } - age := time.Since(info.ModTime()) - if age > c.config.MaxLockAge { + var meta lockFileMetadata + if content, rerr := os.ReadFile(lockPath); rerr == nil { + meta = parseLockFileMetadata(content) + } else { + c.logger.Debug("Failed to read lock file %s: %v", lockPath, rerr) + } + + hostname, _ := os.Hostname() + if meta.PID > 0 && sameHost(meta.Host, hostname) { + // Only perform PID liveness checks when the lock host matches the current host. + // This avoids false positives/negatives when the lock file resides on shared storage. + killErr := killFunc(meta.PID, 0) + if killErr == nil || errors.Is(killErr, syscall.EPERM) { + result.Message = formatInProgress(age, meta) + c.logger.Error("%s", result.Message) + return result + } + if errors.Is(killErr, syscall.ESRCH) { + c.logger.Warning("Removing stale lock file (pid %d not running, age: %v)", meta.PID, age) + if err := osRemove(lockPath); err != nil { + result.Error = fmt.Errorf("failed to remove stale lock: %w", err) + result.Message = result.Error.Error() + return result + } + } else { + // Unexpected error: fall back to age-based detection. + c.logger.Debug("Lock file liveness check failed (pid=%d): %v", meta.PID, killErr) + if age > c.config.MaxLockAge { + c.logger.Warning("Removing stale lock file (age: %v)", age) + if err := osRemove(lockPath); err != nil { + result.Error = fmt.Errorf("failed to remove stale lock: %w", err) + result.Message = result.Error.Error() + return result + } + } else { + result.Message = formatInProgress(age, meta) + c.logger.Error("%s", result.Message) + return result + } + } + } else if age > c.config.MaxLockAge { // Stale lock file, remove it c.logger.Warning("Removing stale lock file (age: %v)", age) if err := osRemove(lockPath); err != nil { @@ -251,7 +355,7 @@ func (c *Checker) CheckLockFile() CheckResult { return result } } else { - result.Message = fmt.Sprintf("Another backup is in progress (lock age: %v)", age) + result.Message = formatInProgress(age, meta) c.logger.Error("%s", result.Message) return result } @@ -554,6 +658,7 @@ func GetDefaultCheckerConfig(backupPath, logPath, lockDir string) *CheckerConfig MinDiskSecondaryGB: 10.0, MinDiskCloudGB: 10.0, SafetyFactor: 1.5, // 50% buffer over estimated size + FsIoTimeout: 30 * time.Second, LockDirPath: lockDir, LockFilePath: filepath.Join(lockDir, ".backup.lock"), MaxLockAge: 2 * time.Hour, @@ -589,7 +694,7 @@ func (c *Checker) CheckDiskSpaceForEstimate(estimatedSizeGB float64) CheckResult } requiredGB := math.Max(entry.min, estimatedSizeGB*c.config.SafetyFactor) - availableGB, err := diskSpaceGB(entry.path) + availableGB, err := c.diskSpaceGB(entry.path) if err != nil { errMsg := fmt.Sprintf("%s disk space check failed (%s): %v", entry.label, entry.path, err) wrappedErr := fmt.Errorf("%s disk space check failed (%s): %w", entry.label, entry.path, err) @@ -632,7 +737,7 @@ func (c *Checker) CheckDiskSpaceForEstimate(estimatedSizeGB float64) CheckResult } func (c *Checker) checkSingleDisk(label, path string, minGB float64) error { - availableGB, err := diskSpaceGB(path) + availableGB, err := c.diskSpaceGB(path) if err != nil { return fmt.Errorf("%s disk space check failed (%s): %w", label, path, err) } @@ -644,10 +749,14 @@ func (c *Checker) checkSingleDisk(label, path string, minGB float64) error { return nil } -func diskSpaceGB(path string) (float64, error) { - var stat syscall.Statfs_t - if err := syscall.Statfs(path, &stat); err != nil { +func (c *Checker) diskSpaceGB(path string) (float64, error) { + timeout := time.Duration(0) + if c != nil && c.config != nil { + timeout = c.config.FsIoTimeout + } + stat, err := safefs.Statfs(context.Background(), path, timeout) + if err != nil { return 0, err } - return float64(stat.Bavail*uint64(stat.Bsize)) / (1024 * 1024 * 1024), nil + return float64(stat.Bavail) * float64(stat.Bsize) / (1024 * 1024 * 1024), nil } diff --git a/internal/checks/checks_test.go b/internal/checks/checks_test.go index 5f60d0d..1dbc6ff 100644 --- a/internal/checks/checks_test.go +++ b/internal/checks/checks_test.go @@ -149,6 +149,49 @@ func TestCheckLockFileStaleLock(t *testing.T) { checker.ReleaseLock() } +func TestCheckLockFile_RemovesLockWhenProcessIsGone(t *testing.T) { + logger := logging.New(types.LogLevelInfo, false) + logger.SetOutput(io.Discard) + + tmpDir := t.TempDir() + lockPath := filepath.Join(tmpDir, ".backup.lock") + + hostname, _ := os.Hostname() + lockContent := fmt.Sprintf("pid=%d\nhost=%s\ntime=%s\n", 999999, hostname, time.Now().Format(time.RFC3339)) + if err := os.WriteFile(lockPath, []byte(lockContent), 0o640); err != nil { + t.Fatalf("write lock file: %v", err) + } + + config := GetDefaultCheckerConfig(tmpDir, tmpDir, tmpDir) + config.LockFilePath = lockPath + config.MaxLockAge = 24 * time.Hour + config.DryRun = false + + origKill := killFunc + t.Cleanup(func() { killFunc = origKill }) + killFunc = func(pid int, sig syscall.Signal) error { + if pid == 999999 && sig == 0 { + return syscall.ESRCH + } + return origKill(pid, sig) + } + + checker := NewChecker(logger, config) + result := checker.CheckLockFile() + if !result.Passed { + t.Fatalf("CheckLockFile should succeed after removing stale lock: %s", result.Message) + } + t.Cleanup(func() { _ = checker.ReleaseLock() }) + + content, err := os.ReadFile(lockPath) + if err != nil { + t.Fatalf("read lock file: %v", err) + } + if !strings.Contains(string(content), fmt.Sprintf("pid=%d\n", os.Getpid())) { + t.Fatalf("lock file not recreated with current pid; got:\n%s", string(content)) + } +} + func TestCheckLockFile_WritesExpectedContent(t *testing.T) { logger := logging.New(types.LogLevelInfo, false) logger.SetOutput(io.Discard) @@ -1000,7 +1043,7 @@ func TestRunAllChecks_FailsOnLockFile(t *testing.T) { } } -func TestCheckLockFile_StatFailsAfterExistenceCheck(t *testing.T) { +func TestCheckLockFile_StatFails(t *testing.T) { logger := logging.New(types.LogLevelInfo, false) logger.SetOutput(io.Discard) @@ -1016,13 +1059,9 @@ func TestCheckLockFile_StatFailsAfterExistenceCheck(t *testing.T) { origStat := osStat t.Cleanup(func() { osStat = origStat }) - calls := 0 osStat = func(name string) (os.FileInfo, error) { if name == lockPath { - calls++ - if calls == 2 { - return nil, &os.PathError{Op: "stat", Path: name, Err: syscall.EIO} - } + return nil, &os.PathError{Op: "stat", Path: name, Err: syscall.EIO} } return origStat(name) } @@ -1597,7 +1636,13 @@ func TestCheckDiskSpaceForEstimate_WarnsOnNonCriticalErrorsAndInsufficientSpace( } func TestDiskSpaceGB_ErrorsOnMissingPath(t *testing.T) { - if _, err := diskSpaceGB("/nonexistent/path"); err == nil { + logger := logging.New(types.LogLevelInfo, false) + logger.SetOutput(io.Discard) + + cfg := GetDefaultCheckerConfig(t.TempDir(), t.TempDir(), t.TempDir()) + checker := NewChecker(logger, cfg) + + if _, err := checker.diskSpaceGB("/nonexistent/path"); err == nil { t.Fatalf("expected diskSpaceGB to error on missing path") } } diff --git a/internal/config/config.go b/internal/config/config.go index 8c4cf28..1bea4f2 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -197,32 +197,30 @@ type Config struct { PVEBackupIncludePattern string BackupCephConfig bool CephConfigPath string + PveshTimeoutSeconds int + FsIoTimeoutSeconds int // PBS-specific collection options - BackupDatastoreConfigs bool - BackupPBSS3Endpoints bool - BackupPBSNodeConfig bool - BackupPBSAcmeAccounts bool - BackupPBSAcmePlugins bool - BackupPBSMetricServers bool - BackupPBSTrafficControl bool - BackupUserConfigs bool - BackupRemoteConfigs bool - BackupSyncJobs bool - BackupVerificationJobs bool - BackupTapeConfigs bool - BackupPBSNetworkConfig bool - BackupPruneSchedules bool - BackupPxarFiles bool - PxarDatastoreConcurrency int - PxarIntraConcurrency int - PxarScanFanoutLevel int - PxarScanMaxRoots int - PxarStopOnCap bool - PxarEnumWorkers int - PxarEnumBudgetMs int - PxarFileIncludePatterns []string - PxarFileExcludePatterns []string + BackupDatastoreConfigs bool + BackupPBSS3Endpoints bool + BackupPBSNodeConfig bool + BackupPBSAcmeAccounts bool + BackupPBSAcmePlugins bool + BackupPBSMetricServers bool + BackupPBSTrafficControl bool + BackupPBSNotifications bool + BackupPBSNotificationsPriv bool + BackupUserConfigs bool + BackupRemoteConfigs bool + BackupSyncJobs bool + BackupVerificationJobs bool + BackupTapeConfigs bool + BackupPBSNetworkConfig bool + BackupPruneSchedules bool + BackupPxarFiles bool + PxarDatastoreConcurrency int + PxarFileIncludePatterns []string + PxarFileExcludePatterns []string // System collection options BackupNetworkConfigs bool @@ -652,6 +650,14 @@ func (c *Config) parsePVESettings() error { c.BackupPVESchedules = c.getBool("BACKUP_PVE_SCHEDULES", true) c.BackupPVEReplication = c.getBool("BACKUP_PVE_REPLICATION", true) c.BackupPVEBackupFiles = c.getBool("BACKUP_PVE_BACKUP_FILES", true) + c.PveshTimeoutSeconds = c.getInt("PVESH_TIMEOUT", 15) + if c.PveshTimeoutSeconds < 0 { + c.PveshTimeoutSeconds = 15 + } + c.FsIoTimeoutSeconds = c.getInt("FS_IO_TIMEOUT", 30) + if c.FsIoTimeoutSeconds < 0 { + c.FsIoTimeoutSeconds = 30 + } c.BackupSmallPVEBackups = c.getBool("BACKUP_SMALL_PVE_BACKUPS", false) if rawSize := strings.TrimSpace(c.getString("MAX_PVE_BACKUP_SIZE", "")); rawSize != "" { sizeBytes, err := parseSizeToBytes(rawSize) @@ -680,6 +686,8 @@ func (c *Config) parsePBSSettings() { c.BackupPBSAcmePlugins = c.getBool("BACKUP_PBS_ACME_PLUGINS", true) c.BackupPBSMetricServers = c.getBool("BACKUP_PBS_METRIC_SERVERS", true) c.BackupPBSTrafficControl = c.getBool("BACKUP_PBS_TRAFFIC_CONTROL", true) + c.BackupPBSNotifications = c.getBool("BACKUP_PBS_NOTIFICATIONS", true) + c.BackupPBSNotificationsPriv = c.getBool("BACKUP_PBS_NOTIFICATIONS_PRIV", c.BackupPBSNotifications) c.BackupUserConfigs = c.getBool("BACKUP_USER_CONFIGS", true) c.BackupRemoteConfigs = c.getBoolWithFallback([]string{"BACKUP_REMOTE_CONFIGS", "BACKUP_REMOTE_CFG"}, true) c.BackupSyncJobs = c.getBool("BACKUP_SYNC_JOBS", true) @@ -690,12 +698,6 @@ func (c *Config) parsePBSSettings() { c.BackupPruneSchedules = c.getBool("BACKUP_PRUNE_SCHEDULES", true) c.BackupPxarFiles = c.getBoolWithFallback([]string{"PXAR_SCAN_ENABLE", "BACKUP_PXAR_FILES"}, true) c.PxarDatastoreConcurrency = c.getInt("PXAR_SCAN_DS_CONCURRENCY", 3) - c.PxarIntraConcurrency = c.getInt("PXAR_SCAN_INTRA_CONCURRENCY", 4) - c.PxarScanFanoutLevel = c.getInt("PXAR_SCAN_FANOUT_LEVEL", 2) - c.PxarScanMaxRoots = c.getInt("PXAR_SCAN_MAX_ROOTS", 2048) - c.PxarStopOnCap = c.getBool("PXAR_STOP_ON_CAP", false) - c.PxarEnumWorkers = c.getInt("PXAR_ENUM_READDIR_WORKERS", 4) - c.PxarEnumBudgetMs = c.getInt("PXAR_ENUM_BUDGET_MS", 0) c.PxarFileIncludePatterns = normalizeList(c.getStringSliceWithFallback([]string{"PXAR_FILE_INCLUDE_PATTERN", "PXAR_INCLUDE_PATTERN"}, nil)) c.PxarFileExcludePatterns = normalizeList(c.getStringSlice("PXAR_FILE_EXCLUDE_PATTERN", nil)) } diff --git a/internal/config/templates/backup.env b/internal/config/templates/backup.env index 7a7cd71..9c3ec39 100644 --- a/internal/config/templates/backup.env +++ b/internal/config/templates/backup.env @@ -281,6 +281,8 @@ BACKUP_PVE_JOBS=true BACKUP_PVE_SCHEDULES=true BACKUP_PVE_REPLICATION=true BACKUP_PVE_BACKUP_FILES=true +PVESH_TIMEOUT=15 # Timeout (sec) per singolo comando pvesh (0 = disabilitato) +FS_IO_TIMEOUT=30 # Timeout (sec) per I/O filesystem su storage (stat/readdir/statfs). Utile per mount di rete irraggiungibili (0 = disabilitato) BACKUP_SMALL_PVE_BACKUPS=false MAX_PVE_BACKUP_SIZE=100M PVE_BACKUP_INCLUDE_PATTERN= @@ -296,6 +298,8 @@ BACKUP_PBS_ACME_ACCOUNTS=true # acme/accounts.cfg BACKUP_PBS_ACME_PLUGINS=true # acme/plugins.cfg BACKUP_PBS_METRIC_SERVERS=true # metricserver.cfg BACKUP_PBS_TRAFFIC_CONTROL=true # traffic-control.cfg +BACKUP_PBS_NOTIFICATIONS=true # notifications.cfg (targets/matchers/endpoints) +BACKUP_PBS_NOTIFICATIONS_PRIV=true # notifications-priv.cfg (secrets/credentials for endpoints) BACKUP_USER_CONFIGS=true BACKUP_REMOTE_CONFIGS=true BACKUP_SYNC_JOBS=true @@ -304,15 +308,9 @@ BACKUP_TAPE_CONFIGS=true BACKUP_PBS_NETWORK_CONFIG=true # network.cfg (PBS), independent from BACKUP_NETWORK_CONFIGS (system) BACKUP_PRUNE_SCHEDULES=true PXAR_SCAN_ENABLE=false -PXAR_SCAN_DS_CONCURRENCY=3 # Number of datastores scanned in parallel for PXAR metadata -PXAR_SCAN_INTRA_CONCURRENCY=4 # Worker threads per datastore for PXAR directory/file sampling -PXAR_SCAN_FANOUT_LEVEL=2 # Directory depth for worker fan-out (1=top-level, 2=vm/ct IDs, increase for namespaces) -PXAR_SCAN_MAX_ROOTS=2048 # Maximum worker roots per datastore (limits fan-out enumeration) -PXAR_STOP_ON_CAP=false # Stop enumeration immediately after hitting PXAR_SCAN_MAX_ROOTS -PXAR_ENUM_READDIR_WORKERS=4 # Parallel ReadDir workers per fanout depth -PXAR_ENUM_BUDGET_MS=0 # Optional time budget for enumeration (0=disabled) -PXAR_FILE_INCLUDE_PATTERN= # Space/comma separated patterns to locate PXAR files (default auto *.pxar,*.pxar.*) -PXAR_FILE_EXCLUDE_PATTERN= # Patterns to exclude while sampling files (e.g. *.tmp, *.lock) +PXAR_SCAN_DS_CONCURRENCY=3 # Datastores scanned in parallel for PXAR metadata +PXAR_FILE_INCLUDE_PATTERN= # Pattern (spazio/virgola) per campionare file di archivio (PBS: *.pxar/catalog.pxar*, PVE: backup vzdump). Lascia vuoto per usare i default. +PXAR_FILE_EXCLUDE_PATTERN= # Pattern da escludere durante il sampling (vale sia per PBS che per PVE; es: *.tmp, *.lock) # Override collection paths (use only if directories differ from defaults) # Note: $VAR / ${VAR} expansion resolves keys from this file too (no need to export). diff --git a/internal/config/upgrade.go b/internal/config/upgrade.go index 755f12c..d319a4a 100644 --- a/internal/config/upgrade.go +++ b/internal/config/upgrade.go @@ -558,17 +558,3 @@ func findClosingQuoteLine(lines []string, start int) (int, error) { } return 0, fmt.Errorf("closing quote not found") } - -func renderEnvValue(key string, value envValue) []string { - if value.kind == envValueKindBlock { - lines := []string{fmt.Sprintf("%s=\"", key)} - lines = append(lines, value.blockLines...) - lines = append(lines, "\"") - return lines - } - line := fmt.Sprintf("%s=%s", key, value.rawValue) - if value.comment != "" { - line += " " + value.comment - } - return []string{line} -} diff --git a/internal/identity/identity_test.go b/internal/identity/identity_test.go index f904228..d7a6354 100644 --- a/internal/identity/identity_test.go +++ b/internal/identity/identity_test.go @@ -844,10 +844,10 @@ func TestAddrAssignRank(t *testing.T) { value int want int }{ - {0, 0}, // permanent - best - {3, 1}, // set by userspace - {2, 2}, // stolen - {1, 3}, // random + {0, 0}, // permanent - best + {3, 1}, // set by userspace + {2, 2}, // stolen + {1, 3}, // random {-1, 4}, // unknown {99, 4}, // unknown } @@ -1324,10 +1324,10 @@ func TestSelectPreferredMACEmpty(t *testing.T) { func TestSelectPreferredMACWithEmptyFields(t *testing.T) { candidates := []macCandidate{ - {Iface: "", MAC: "aa:bb:cc:dd:ee:ff"}, // empty iface - {Iface: "eth0", MAC: ""}, // empty mac - {Iface: " ", MAC: " "}, // whitespace only - {Iface: "eth1", MAC: "00:11:22:33:44:55"}, // valid + {Iface: "", MAC: "aa:bb:cc:dd:ee:ff"}, // empty iface + {Iface: "eth0", MAC: ""}, // empty mac + {Iface: " ", MAC: " "}, // whitespace only + {Iface: "eth1", MAC: "00:11:22:33:44:55"}, // valid } mac, iface := selectPreferredMAC(candidates) diff --git a/internal/orchestrator/backup_safety_test.go b/internal/orchestrator/backup_safety_test.go index 80c8cf5..2529e2e 100644 --- a/internal/orchestrator/backup_safety_test.go +++ b/internal/orchestrator/backup_safety_test.go @@ -1938,10 +1938,10 @@ func TestRestoreSafetyBackup_ComplexStructure(t *testing.T) { // Add files in various directories files := map[string]string{ - "etc/config.conf": "config content", - "etc/app/app.conf": "app config", - "var/lib/app/data": "app data", - "root.txt": "root file", + "etc/config.conf": "config content", + "etc/app/app.conf": "app config", + "var/lib/app/data": "app data", + "root.txt": "root file", } for name, content := range files { hdr := &tar.Header{Name: name, Mode: 0644, Size: int64(len(content))} diff --git a/internal/orchestrator/bundle_test.go b/internal/orchestrator/bundle_test.go index 6a9305c..9462b4b 100644 --- a/internal/orchestrator/bundle_test.go +++ b/internal/orchestrator/bundle_test.go @@ -90,11 +90,12 @@ func TestCreateBundle_CreatesValidTarArchive(t *testing.T) { } expectedContent := testData[""] - if header.Name == "backup.tar.sha256" { + switch header.Name { + case "backup.tar.sha256": expectedContent = testData[".sha256"] - } else if header.Name == "backup.tar.metadata" { + case "backup.tar.metadata": expectedContent = testData[".metadata"] - } else if header.Name == "backup.tar.metadata.sha256" { + case "backup.tar.metadata.sha256": expectedContent = testData[".metadata.sha256"] } diff --git a/internal/orchestrator/categories.go b/internal/orchestrator/categories.go index 053bb83..56483d8 100644 --- a/internal/orchestrator/categories.go +++ b/internal/orchestrator/categories.go @@ -178,6 +178,12 @@ func GetAllCategories() []Category { "./etc/proxmox-backup/acme/plugins.cfg", "./etc/proxmox-backup/metricserver.cfg", "./etc/proxmox-backup/traffic-control.cfg", + "./var/lib/proxsave-info/commands/pbs/node_config.json", + "./var/lib/proxsave-info/commands/pbs/acme_accounts.json", + "./var/lib/proxsave-info/commands/pbs/acme_plugins.json", + "./var/lib/proxsave-info/commands/pbs/acme_account_*_info.json", + "./var/lib/proxsave-info/commands/pbs/acme_plugin_*_config.json", + "./var/lib/proxsave-info/commands/pbs/traffic_control.json", }, }, { @@ -188,6 +194,11 @@ func GetAllCategories() []Category { Paths: []string{ "./etc/proxmox-backup/datastore.cfg", "./etc/proxmox-backup/s3.cfg", + "./var/lib/proxsave-info/commands/pbs/datastore_list.json", + "./var/lib/proxsave-info/commands/pbs/datastore_*_status.json", + "./var/lib/proxsave-info/commands/pbs/s3_endpoints.json", + "./var/lib/proxsave-info/commands/pbs/s3_endpoint_*_buckets.json", + "./var/lib/proxsave-info/commands/pbs/pbs_datastore_inventory.json", }, }, { @@ -208,6 +219,10 @@ func GetAllCategories() []Category { "./etc/proxmox-backup/sync.cfg", "./etc/proxmox-backup/verification.cfg", "./etc/proxmox-backup/prune.cfg", + "./var/lib/proxsave-info/commands/pbs/sync_jobs.json", + "./var/lib/proxsave-info/commands/pbs/verification_jobs.json", + "./var/lib/proxsave-info/commands/pbs/prune_jobs.json", + "./var/lib/proxsave-info/commands/pbs/gc_jobs.json", }, }, { @@ -217,6 +232,7 @@ func GetAllCategories() []Category { Type: CategoryTypePBS, Paths: []string{ "./etc/proxmox-backup/remote.cfg", + "./var/lib/proxsave-info/commands/pbs/remote_list.json", }, }, { @@ -227,6 +243,9 @@ func GetAllCategories() []Category { Paths: []string{ "./etc/proxmox-backup/notifications.cfg", "./etc/proxmox-backup/notifications-priv.cfg", + "./var/lib/proxsave-info/commands/pbs/notification_targets.json", + "./var/lib/proxsave-info/commands/pbs/notification_matchers.json", + "./var/lib/proxsave-info/commands/pbs/notification_endpoints_*.json", }, }, { @@ -242,6 +261,11 @@ func GetAllCategories() []Category { "./etc/proxmox-backup/shadow.json", "./etc/proxmox-backup/token.shadow", "./etc/proxmox-backup/tfa.json", + "./var/lib/proxsave-info/commands/pbs/user_list.json", + "./var/lib/proxsave-info/commands/pbs/realms_ldap.json", + "./var/lib/proxsave-info/commands/pbs/realms_ad.json", + "./var/lib/proxsave-info/commands/pbs/realms_openid.json", + "./var/lib/proxsave-info/commands/pbs/acl_list.json", }, }, { @@ -254,6 +278,9 @@ func GetAllCategories() []Category { "./etc/proxmox-backup/tape-job.cfg", "./etc/proxmox-backup/media-pool.cfg", "./etc/proxmox-backup/tape-encryption-keys.json", + "./var/lib/proxsave-info/commands/pbs/tape_drives.json", + "./var/lib/proxsave-info/commands/pbs/tape_changers.json", + "./var/lib/proxsave-info/commands/pbs/tape_pools.json", }, }, @@ -443,14 +470,17 @@ func GetCategoriesForSystem(systemType string) []Category { all := GetAllCategories() var categories []Category - for _, cat := range all { - if systemType == "pve" { - // PVE system: include PVE and common categories + switch systemType { + case "pve": + // PVE system: include PVE and common categories + for _, cat := range all { if cat.Type == CategoryTypePVE || cat.Type == CategoryTypeCommon { categories = append(categories, cat) } - } else if systemType == "pbs" { - // PBS system: include PBS and common categories + } + case "pbs": + // PBS system: include PBS and common categories + for _, cat := range all { if cat.Type == CategoryTypePBS || cat.Type == CategoryTypeCommon { categories = append(categories, cat) } @@ -539,14 +569,15 @@ func GetStorageModeCategories(systemType string) []Category { all := GetAllCategories() var categories []Category - if systemType == "pve" { + switch systemType { + case "pve": // PVE: cluster + storage + jobs + zfs + filesystem + storage stack for _, cat := range all { if cat.ID == "pve_cluster" || cat.ID == "storage_pve" || cat.ID == "pve_jobs" || cat.ID == "zfs" || cat.ID == "filesystem" || cat.ID == "storage_stack" { categories = append(categories, cat) } } - } else if systemType == "pbs" { + case "pbs": // PBS: config export + datastore + maintenance + jobs + remotes + zfs + filesystem + storage stack for _, cat := range all { if cat.ID == "pbs_config" || cat.ID == "datastore_pbs" || cat.ID == "maintenance_pbs" || cat.ID == "pbs_jobs" || cat.ID == "pbs_remotes" || cat.ID == "zfs" || cat.ID == "filesystem" || cat.ID == "storage_stack" { diff --git a/internal/orchestrator/compatibility.go b/internal/orchestrator/compatibility.go index 8e541cd..cc2eb3f 100644 --- a/internal/orchestrator/compatibility.go +++ b/internal/orchestrator/compatibility.go @@ -124,11 +124,12 @@ func GetSystemInfo() map[string]string { info["type_name"] = GetSystemTypeString(systemType) // Get version information - if systemType == SystemTypePVE { + switch systemType { + case SystemTypePVE: if content, err := compatFS.ReadFile("/etc/pve-release"); err == nil { info["version"] = strings.TrimSpace(string(content)) } - } else if systemType == SystemTypePBS { + case SystemTypePBS: if content, err := compatFS.ReadFile("/etc/proxmox-backup-release"); err == nil { info["version"] = strings.TrimSpace(string(content)) } diff --git a/internal/orchestrator/decrypt.go b/internal/orchestrator/decrypt.go index 92e9e6b..c995530 100644 --- a/internal/orchestrator/decrypt.go +++ b/internal/orchestrator/decrypt.go @@ -80,8 +80,8 @@ func RunDecryptWorkflowWithDeps(ctx context.Context, deps *Deps, version string) done := logging.DebugStart(logger, "decrypt workflow", "version=%s", version) defer func() { done(err) }() - ui := newCLIWorkflowUI(bufio.NewReader(os.Stdin), logger) - return runDecryptWorkflowWithUI(ctx, cfg, logger, version, ui) + ui := newCLIWorkflowUI(bufio.NewReader(os.Stdin), logger) + return runDecryptWorkflowWithUI(ctx, cfg, logger, version, ui) } // RunDecryptWorkflow is the legacy entrypoint that builds default deps. @@ -652,7 +652,7 @@ func copyRawArtifactsToWorkdirWithLogger(ctx context.Context, cand *decryptCandi func decryptArchiveWithPrompts(ctx context.Context, reader *bufio.Reader, encryptedPath, outputPath string, logger *logging.Logger) error { ui := newCLIWorkflowUI(reader, logger) displayName := filepath.Base(encryptedPath) - return decryptArchiveWithSecretPrompt(ctx, encryptedPath, outputPath, displayName, logger, ui.PromptDecryptSecret) + return decryptArchiveWithSecretPrompt(ctx, encryptedPath, outputPath, displayName, ui.PromptDecryptSecret) } func parseIdentityInput(input string) ([]age.Identity, error) { diff --git a/internal/orchestrator/decrypt_test.go b/internal/orchestrator/decrypt_test.go index d663df7..59be13c 100644 --- a/internal/orchestrator/decrypt_test.go +++ b/internal/orchestrator/decrypt_test.go @@ -34,19 +34,19 @@ func TestBuildDecryptPathOptions(t *testing.T) { wantPaths []string wantLabel []string }{ - { - name: "all paths enabled", - cfg: &config.Config{ - BackupPath: "/backup/local", - SecondaryEnabled: true, - SecondaryPath: "/backup/secondary", - CloudEnabled: true, - CloudRemote: "/backup/cloud", - }, - wantCount: 3, - wantPaths: []string{"/backup/local", "/backup/secondary", "/backup/cloud"}, - wantLabel: []string{"Local backups", "Secondary backups", "Cloud backups"}, + { + name: "all paths enabled", + cfg: &config.Config{ + BackupPath: "/backup/local", + SecondaryEnabled: true, + SecondaryPath: "/backup/secondary", + CloudEnabled: true, + CloudRemote: "/backup/cloud", }, + wantCount: 3, + wantPaths: []string{"/backup/local", "/backup/secondary", "/backup/cloud"}, + wantLabel: []string{"Local backups", "Secondary backups", "Cloud backups"}, + }, { name: "only local path", cfg: &config.Config{ @@ -91,28 +91,28 @@ func TestBuildDecryptPathOptions(t *testing.T) { wantPaths: []string{"/backup/local"}, wantLabel: []string{"Local backups"}, }, - { - name: "cloud with rclone remote included", - cfg: &config.Config{ - BackupPath: "/backup/local", - CloudEnabled: true, - CloudRemote: "gdrive:backups", // rclone remote - }, - wantCount: 2, - wantPaths: []string{"/backup/local", "gdrive:backups"}, - wantLabel: []string{"Local backups", "Cloud backups (rclone)"}, + { + name: "cloud with rclone remote included", + cfg: &config.Config{ + BackupPath: "/backup/local", + CloudEnabled: true, + CloudRemote: "gdrive:backups", // rclone remote }, - { - name: "cloud with local absolute path included", - cfg: &config.Config{ - BackupPath: "/backup/local", - CloudEnabled: true, - CloudRemote: "/mnt/cloud/backups", - }, - wantCount: 2, - wantPaths: []string{"/backup/local", "/mnt/cloud/backups"}, - wantLabel: []string{"Local backups", "Cloud backups"}, + wantCount: 2, + wantPaths: []string{"/backup/local", "gdrive:backups"}, + wantLabel: []string{"Local backups", "Cloud backups (rclone)"}, + }, + { + name: "cloud with local absolute path included", + cfg: &config.Config{ + BackupPath: "/backup/local", + CloudEnabled: true, + CloudRemote: "/mnt/cloud/backups", }, + wantCount: 2, + wantPaths: []string{"/backup/local", "/mnt/cloud/backups"}, + wantLabel: []string{"Local backups", "Cloud backups"}, + }, { name: "secondary enabled but path empty", cfg: &config.Config{ @@ -135,17 +135,17 @@ func TestBuildDecryptPathOptions(t *testing.T) { wantPaths: []string{"/backup/local"}, wantLabel: []string{"Local backups"}, }, - { - name: "cloud absolute with colon allowed", - cfg: &config.Config{ - BackupPath: "/backup/local", - CloudEnabled: true, - CloudRemote: "/mnt/backups:foo", - }, - wantCount: 2, - wantPaths: []string{"/backup/local", "/mnt/backups:foo"}, - wantLabel: []string{"Local backups", "Cloud backups"}, + { + name: "cloud absolute with colon allowed", + cfg: &config.Config{ + BackupPath: "/backup/local", + CloudEnabled: true, + CloudRemote: "/mnt/backups:foo", }, + wantCount: 2, + wantPaths: []string{"/backup/local", "/mnt/backups:foo"}, + wantLabel: []string{"Local backups", "Cloud backups"}, + }, { name: "all paths empty", cfg: &config.Config{}, @@ -2588,7 +2588,7 @@ func TestInspectRcloneMetadataManifest_RcloneFails(t *testing.T) { // copyRawArtifactsToWorkdirWithLogger coverage tests // ===================================== -func TestCopyRawArtifactsToWorkdir_NilContext(t *testing.T) { +func TestCopyRawArtifactsToWorkdir_ContextWorks(t *testing.T) { origFS := restoreFS restoreFS = osFS{} t.Cleanup(func() { restoreFS = origFS }) @@ -2612,8 +2612,7 @@ func TestCopyRawArtifactsToWorkdir_NilContext(t *testing.T) { RawChecksumPath: "", } - // Pass nil context - function should use context.Background() - staged, err := copyRawArtifactsToWorkdirWithLogger(nil, cand, workDir, nil) + staged, err := copyRawArtifactsToWorkdirWithLogger(context.TODO(), cand, workDir, nil) if err != nil { t.Fatalf("copyRawArtifactsToWorkdirWithLogger error: %v", err) } diff --git a/internal/orchestrator/decrypt_tui.go b/internal/orchestrator/decrypt_tui.go index b0f23a8..655f7f5 100644 --- a/internal/orchestrator/decrypt_tui.go +++ b/internal/orchestrator/decrypt_tui.go @@ -340,12 +340,21 @@ func preparePlainBundleTUI(ctx context.Context, cand *decryptCandidate, version func decryptArchiveWithTUIPrompts(ctx context.Context, encryptedPath, outputPath, displayName, configPath, buildSig string, logger *logging.Logger) error { var promptError string + if ctx == nil { + ctx = context.Background() + } for { + if err := ctx.Err(); err != nil { + return err + } identities, err := promptDecryptIdentity(displayName, configPath, buildSig, promptError) if err != nil { return err } + if err := ctx.Err(); err != nil { + return err + } if err := decryptWithIdentity(encryptedPath, outputPath, identities...); err != nil { var noMatch *age.NoIdentityMatchError if errors.Is(err, age.ErrIncorrectIdentity) || errors.As(err, &noMatch) { diff --git a/internal/orchestrator/decrypt_tui_simulation_test.go b/internal/orchestrator/decrypt_tui_simulation_test.go index d36f36f..9a65f1c 100644 --- a/internal/orchestrator/decrypt_tui_simulation_test.go +++ b/internal/orchestrator/decrypt_tui_simulation_test.go @@ -34,4 +34,3 @@ func TestPromptDecryptIdentity_PassphraseReturnsIdentity(t *testing.T) { t.Fatalf("expected at least one identity") } } - diff --git a/internal/orchestrator/decrypt_workflow_ui.go b/internal/orchestrator/decrypt_workflow_ui.go index 24d0377..a57d45a 100644 --- a/internal/orchestrator/decrypt_workflow_ui.go +++ b/internal/orchestrator/decrypt_workflow_ui.go @@ -137,7 +137,7 @@ func ensureWritablePathWithUI(ctx context.Context, ui DecryptWorkflowUI, targetP } } -func decryptArchiveWithSecretPrompt(ctx context.Context, encryptedPath, outputPath, displayName string, logger *logging.Logger, prompt func(ctx context.Context, displayName, previousError string) (string, error)) error { +func decryptArchiveWithSecretPrompt(ctx context.Context, encryptedPath, outputPath, displayName string, prompt func(ctx context.Context, displayName, previousError string) (string, error)) error { promptError := "" for { secret, err := prompt(ctx, displayName, promptError) @@ -246,7 +246,7 @@ func preparePlainBundleWithUI(ctx context.Context, cand *decryptCandidate, versi if strings.TrimSpace(displayName) == "" { displayName = filepath.Base(manifestCopy.ArchivePath) } - if err := decryptArchiveWithSecretPrompt(ctx, staged.ArchivePath, plainArchivePath, displayName, logger, ui.PromptDecryptSecret); err != nil { + if err := decryptArchiveWithSecretPrompt(ctx, staged.ArchivePath, plainArchivePath, displayName, ui.PromptDecryptSecret); err != nil { cleanup() return nil, err } diff --git a/internal/orchestrator/deps.go b/internal/orchestrator/deps.go index cb64194..648e20b 100644 --- a/internal/orchestrator/deps.go +++ b/internal/orchestrator/deps.go @@ -197,7 +197,6 @@ func NewWithDeps(deps Deps) *Orchestrator { base.Time = deps.Time } if deps.Config != nil { - base.Config = deps.Config base.DryRun = deps.Config.DryRun } diff --git a/internal/orchestrator/directory_recreation.go b/internal/orchestrator/directory_recreation.go index 19dc9f3..611aaff 100644 --- a/internal/orchestrator/directory_recreation.go +++ b/internal/orchestrator/directory_recreation.go @@ -294,7 +294,7 @@ func createPBSDatastoreStructure(basePath, datastoreName string, logger *logging // If the datastore already contains chunk/index data, avoid any modifications to prevent touching real backup data. // We only validate and report issues. if hasData { - if warn := validatePBSDatastoreReadOnly(basePath, logger); warn != "" { + if warn := validatePBSDatastoreReadOnly(basePath); warn != "" { logger.Warning("PBS datastore preflight: %s", warn) } logger.Info("PBS datastore preflight: datastore %s appears to contain data; skipping directory/permission changes to avoid risking datastore contents", datastoreName) @@ -371,7 +371,7 @@ func createPBSDatastoreStructure(basePath, datastoreName string, logger *logging return changed, nil } -func validatePBSDatastoreReadOnly(datastorePath string, logger *logging.Logger) string { +func validatePBSDatastoreReadOnly(datastorePath string) string { if datastorePath == "" { return "datastore path is empty" } @@ -843,15 +843,16 @@ func isIgnorableOwnershipError(err error) bool { func RecreateDirectoriesFromConfig(systemType SystemType, logger *logging.Logger) error { logger.Info("Recreating directory structures from configuration...") - if systemType == SystemTypePVE { + switch systemType { + case SystemTypePVE: if err := RecreateStorageDirectories(logger); err != nil { return fmt.Errorf("recreate PVE storage directories: %w", err) } - } else if systemType == SystemTypePBS { + case SystemTypePBS: if err := RecreateDatastoreDirectories(logger); err != nil { return fmt.Errorf("recreate PBS datastore directories: %w", err) } - } else { + default: logger.Debug("Unknown system type, skipping directory recreation") } diff --git a/internal/orchestrator/fs_atomic.go b/internal/orchestrator/fs_atomic.go index 476c845..879f0ce 100644 --- a/internal/orchestrator/fs_atomic.go +++ b/internal/orchestrator/fs_atomic.go @@ -75,7 +75,7 @@ func ensureDirExistsWithInheritedMeta(dir string) error { return nil } return fmt.Errorf("path exists but is not a directory: %s", dir) - } else if err != nil && !errors.Is(err, os.ErrNotExist) { + } else if !errors.Is(err, os.ErrNotExist) { return fmt.Errorf("stat %s: %w", dir, err) } @@ -123,7 +123,7 @@ func ensureDirExistsWithInheritedMeta(dir string) error { continue } return fmt.Errorf("path exists but is not a directory: %s", p) - } else if err != nil && !errors.Is(err, os.ErrNotExist) { + } else if !errors.Is(err, os.ErrNotExist) { return fmt.Errorf("stat %s: %w", p, err) } diff --git a/internal/orchestrator/mount_guard.go b/internal/orchestrator/mount_guard.go index bc352f9..037811d 100644 --- a/internal/orchestrator/mount_guard.go +++ b/internal/orchestrator/mount_guard.go @@ -220,6 +220,13 @@ func maybeApplyPBSDatastoreMountGuards(ctx context.Context, logger *logging.Logg } func guardMountPoint(ctx context.Context, guardTarget string) error { + if ctx == nil { + ctx = context.Background() + } + if err := ctx.Err(); err != nil { + return err + } + target := filepath.Clean(strings.TrimSpace(guardTarget)) if target == "" || target == "." || target == string(os.PathSeparator) { return fmt.Errorf("invalid guard target: %q", guardTarget) diff --git a/internal/orchestrator/network_apply_workflow_ui.go b/internal/orchestrator/network_apply_workflow_ui.go index 14e931b..b150c89 100644 --- a/internal/orchestrator/network_apply_workflow_ui.go +++ b/internal/orchestrator/network_apply_workflow_ui.go @@ -267,7 +267,7 @@ func applyNetworkWithRollbackWithUI(ctx context.Context, ui RestoreWorkflowUI, l if strings.TrimSpace(iface) != "" { if cur, err := currentNetworkEndpoint(ctx, iface, 2*time.Second); err == nil { - if tgt, err := targetNetworkEndpointFromConfig(logger, iface); err == nil { + if tgt, err := targetNetworkEndpointFromConfig(iface); err == nil { logger.Info("Network plan: %s -> %s", cur.summary(), tgt.summary()) } } @@ -275,7 +275,7 @@ func applyNetworkWithRollbackWithUI(ctx context.Context, ui RestoreWorkflowUI, l if diagnosticsDir != "" { logging.DebugStep(logger, "network safe apply (ui)", "Write network plan (current -> target)") - if planText, err := buildNetworkPlanReport(ctx, logger, iface, source, 2*time.Second); err != nil { + if planText, err := buildNetworkPlanReport(ctx, iface, source, 2*time.Second); err != nil { logger.Debug("Network plan build failed: %v", err) } else if strings.TrimSpace(planText) != "" { if path, err := writeNetworkTextReportFile(diagnosticsDir, "plan.txt", planText+"\n"); err != nil { diff --git a/internal/orchestrator/network_health.go b/internal/orchestrator/network_health.go index 2c7faed..8b583f5 100644 --- a/internal/orchestrator/network_health.go +++ b/internal/orchestrator/network_health.go @@ -89,17 +89,6 @@ type networkHealthOptions struct { LocalPortChecks []tcpPortCheck } -func defaultNetworkHealthOptions() networkHealthOptions { - return networkHealthOptions{ - SystemType: SystemTypeUnknown, - Logger: nil, - CommandTimeout: 3 * time.Second, - EnableGatewayPing: true, - ForceSSHRouteCheck: false, - EnableDNSResolve: true, - } -} - type tcpPortCheck struct { Name string Address string diff --git a/internal/orchestrator/network_plan.go b/internal/orchestrator/network_plan.go index 7c07711..11c1eb9 100644 --- a/internal/orchestrator/network_plan.go +++ b/internal/orchestrator/network_plan.go @@ -7,8 +7,6 @@ import ( "sort" "strings" "time" - - "github.com/tis24dev/proxsave/internal/logging" ) type networkEndpoint struct { @@ -33,7 +31,7 @@ func (e networkEndpoint) summary() string { return fmt.Sprintf("iface=%s ip=%s gw=%s", iface, addrs, gw) } -func buildNetworkPlanReport(ctx context.Context, logger *logging.Logger, iface, source string, timeout time.Duration) (string, error) { +func buildNetworkPlanReport(ctx context.Context, iface, source string, timeout time.Duration) (string, error) { if strings.TrimSpace(iface) == "" { return fmt.Sprintf("Network plan\n\n- Management interface: n/a\n- Detection source: %s\n", strings.TrimSpace(source)), nil } @@ -42,7 +40,7 @@ func buildNetworkPlanReport(ctx context.Context, logger *logging.Logger, iface, } current, _ := currentNetworkEndpoint(ctx, iface, timeout) - target, _ := targetNetworkEndpointFromConfig(logger, iface) + target, _ := targetNetworkEndpointFromConfig(iface) var b strings.Builder b.WriteString("Network plan\n\n") @@ -77,7 +75,7 @@ func currentNetworkEndpoint(ctx context.Context, iface string, timeout time.Dura return ep, nil } -func targetNetworkEndpointFromConfig(logger *logging.Logger, iface string) (networkEndpoint, error) { +func targetNetworkEndpointFromConfig(iface string) (networkEndpoint, error) { ep := networkEndpoint{Interface: strings.TrimSpace(iface)} if ep.Interface == "" { return ep, fmt.Errorf("empty interface") diff --git a/internal/orchestrator/orchestrator.go b/internal/orchestrator/orchestrator.go index ddab036..c629435 100644 --- a/internal/orchestrator/orchestrator.go +++ b/internal/orchestrator/orchestrator.go @@ -231,7 +231,7 @@ func (o *Orchestrator) logStep(step int, format string, args ...interface{}) { if len(args) > 0 { message = fmt.Sprintf(format, args...) } - o.logger.Step("%s", message) + o.logger.Step("[%d] %s", step, message) } // SetUpdateInfo records version update information discovered by the CLI layer. @@ -1054,6 +1054,13 @@ func (s *BackupStats) toPrometheusMetrics() *metrics.BackupMetrics { } func (o *Orchestrator) createBundle(ctx context.Context, archivePath string) (bundlePath string, err error) { + if ctx == nil { + ctx = context.Background() + } + if err := ctx.Err(); err != nil { + return "", err + } + logger := o.logger fs := o.filesystem() dir := filepath.Dir(archivePath) @@ -1073,6 +1080,9 @@ func (o *Orchestrator) createBundle(ctx context.Context, archivePath string) (bu } for _, file := range associated[:3] { + if err := ctx.Err(); err != nil { + return "", err + } if _, err := fs.Stat(filepath.Join(dir, file)); err != nil { return "", fmt.Errorf("associated file not found: %s: %w", file, err) } @@ -1093,6 +1103,9 @@ func (o *Orchestrator) createBundle(ctx context.Context, archivePath string) (bu // Add each associated file to the tar archive for _, filename := range associated { + if err := ctx.Err(); err != nil { + return "", err + } filePath := filepath.Join(dir, filename) // Get file info @@ -1119,7 +1132,7 @@ func (o *Orchestrator) createBundle(ctx context.Context, archivePath string) (bu return "", fmt.Errorf("failed to open %s: %w", filename, err) } - if _, err := io.Copy(tw, file); err != nil { + if _, err := io.Copy(tw, &contextReader{ctx: ctx, r: file}); err != nil { file.Close() return "", fmt.Errorf("failed to write %s to tar: %w", filename, err) } @@ -1144,6 +1157,18 @@ func (o *Orchestrator) createBundle(ctx context.Context, archivePath string) (bu return bundlePath, nil } +type contextReader struct { + ctx context.Context + r io.Reader +} + +func (cr *contextReader) Read(p []byte) (int, error) { + if err := cr.ctx.Err(); err != nil { + return 0, err + } + return cr.r.Read(p) +} + func (o *Orchestrator) removeAssociatedFiles(archivePath string) error { logger := o.logger fs := o.filesystem() @@ -1474,6 +1499,8 @@ func applyCollectorOverrides(cc *backup.CollectorConfig, cfg *config.Config) { cc.PVEBackupIncludePattern = cfg.PVEBackupIncludePattern cc.BackupCephConfig = cfg.BackupCephConfig cc.CephConfigPath = cfg.CephConfigPath + cc.PveshTimeoutSeconds = cfg.PveshTimeoutSeconds + cc.FsIoTimeoutSeconds = cfg.FsIoTimeoutSeconds cc.BackupDatastoreConfigs = cfg.BackupDatastoreConfigs cc.BackupPBSS3Endpoints = cfg.BackupPBSS3Endpoints @@ -1482,6 +1509,8 @@ func applyCollectorOverrides(cc *backup.CollectorConfig, cfg *config.Config) { cc.BackupPBSAcmePlugins = cfg.BackupPBSAcmePlugins cc.BackupPBSMetricServers = cfg.BackupPBSMetricServers cc.BackupPBSTrafficControl = cfg.BackupPBSTrafficControl + cc.BackupPBSNotifications = cfg.BackupPBSNotifications + cc.BackupPBSNotificationsPriv = cfg.BackupPBSNotificationsPriv cc.BackupUserConfigs = cfg.BackupUserConfigs cc.BackupRemoteConfigs = cfg.BackupRemoteConfigs cc.BackupSyncJobs = cfg.BackupSyncJobs @@ -1512,22 +1541,6 @@ func applyCollectorOverrides(cc *backup.CollectorConfig, cfg *config.Config) { if cfg.PxarDatastoreConcurrency > 0 { cc.PxarDatastoreConcurrency = cfg.PxarDatastoreConcurrency } - if cfg.PxarIntraConcurrency > 0 { - cc.PxarIntraConcurrency = cfg.PxarIntraConcurrency - } - if cfg.PxarScanFanoutLevel > 0 { - cc.PxarScanFanoutLevel = cfg.PxarScanFanoutLevel - } - if cfg.PxarScanMaxRoots > 0 { - cc.PxarScanMaxRoots = cfg.PxarScanMaxRoots - } - cc.PxarStopOnCap = cfg.PxarStopOnCap - if cfg.PxarEnumWorkers > 0 { - cc.PxarEnumWorkers = cfg.PxarEnumWorkers - } - if cfg.PxarEnumBudgetMs >= 0 { - cc.PxarEnumBudgetMs = cfg.PxarEnumBudgetMs - } cc.PxarFileIncludePatterns = append([]string(nil), cfg.PxarFileIncludePatterns...) cc.PxarFileExcludePatterns = append([]string(nil), cfg.PxarFileExcludePatterns...) diff --git a/internal/orchestrator/orchestrator_test.go b/internal/orchestrator/orchestrator_test.go index 2b5be72..b4400e1 100644 --- a/internal/orchestrator/orchestrator_test.go +++ b/internal/orchestrator/orchestrator_test.go @@ -609,12 +609,6 @@ func TestApplyCollectorOverridesCopiesConfig(t *testing.T) { BaseDir: "/opt/proxsave", PxarDatastoreConcurrency: 3, - PxarIntraConcurrency: 4, - PxarScanFanoutLevel: 2, - PxarScanMaxRoots: 512, - PxarStopOnCap: true, - PxarEnumWorkers: 5, - PxarEnumBudgetMs: 100, PxarFileIncludePatterns: []string{"*.conf"}, PxarFileExcludePatterns: []string{"*.tmp"}, @@ -653,15 +647,8 @@ func TestApplyCollectorOverridesCopiesConfig(t *testing.T) { if cc.ScriptRepositoryPath != cfg.BaseDir { t.Fatalf("ScriptRepositoryPath = %s, want %s", cc.ScriptRepositoryPath, cfg.BaseDir) } - if cc.PxarDatastoreConcurrency != cfg.PxarDatastoreConcurrency || - cc.PxarIntraConcurrency != cfg.PxarIntraConcurrency || - cc.PxarScanFanoutLevel != cfg.PxarScanFanoutLevel || - cc.PxarScanMaxRoots != cfg.PxarScanMaxRoots || - cc.PxarEnumWorkers != cfg.PxarEnumWorkers { - t.Fatalf("Pxar concurrency fields not copied correctly") - } - if !cc.PxarStopOnCap || cc.PxarEnumBudgetMs != cfg.PxarEnumBudgetMs { - t.Fatalf("PxarStopOnCap or PxarEnumBudgetMs not copied") + if cc.PxarDatastoreConcurrency != cfg.PxarDatastoreConcurrency { + t.Fatalf("PxarDatastoreConcurrency not copied correctly") } if len(cc.PxarFileIncludePatterns) != 1 || cc.PxarFileIncludePatterns[0] != "*.conf" { t.Fatalf("PxarFileIncludePatterns not copied as expected: %#v", cc.PxarFileIncludePatterns) diff --git a/internal/orchestrator/pbs_api_apply.go b/internal/orchestrator/pbs_api_apply.go new file mode 100644 index 0000000..830bcc4 --- /dev/null +++ b/internal/orchestrator/pbs_api_apply.go @@ -0,0 +1,695 @@ +package orchestrator + +import ( + "context" + "encoding/json" + "fmt" + "os" + "sort" + "strings" + "time" + + "github.com/tis24dev/proxsave/internal/logging" +) + +func normalizeProxmoxCfgKey(key string) string { + key = strings.ToLower(strings.TrimSpace(key)) + key = strings.ReplaceAll(key, "_", "-") + return key +} + +func buildProxmoxManagerFlags(entries []proxmoxNotificationEntry, skipKeys ...string) []string { + if len(entries) == 0 { + return nil + } + skip := make(map[string]struct{}, len(skipKeys)+2) + for _, k := range skipKeys { + skip[normalizeProxmoxCfgKey(k)] = struct{}{} + } + // Common no-op keys + skip["digest"] = struct{}{} + skip["name"] = struct{}{} + + args := make([]string, 0, len(entries)*2) + for _, kv := range entries { + key := normalizeProxmoxCfgKey(kv.Key) + if key == "" { + continue + } + if _, ok := skip[key]; ok { + continue + } + value := strings.TrimSpace(kv.Value) + args = append(args, "--"+key) + args = append(args, value) + } + return args +} + +func popEntryValue(entries []proxmoxNotificationEntry, keys ...string) (value string, remaining []proxmoxNotificationEntry, ok bool) { + if len(entries) == 0 || len(keys) == 0 { + return "", entries, false + } + want := make(map[string]struct{}, len(keys)) + for _, k := range keys { + want[normalizeProxmoxCfgKey(k)] = struct{}{} + } + + remaining = make([]proxmoxNotificationEntry, 0, len(entries)) + for _, kv := range entries { + key := normalizeProxmoxCfgKey(kv.Key) + if _, match := want[key]; match && !ok { + value = strings.TrimSpace(kv.Value) + ok = true + continue + } + remaining = append(remaining, kv) + } + return value, remaining, ok +} + +func runPBSManagerRedacted(ctx context.Context, args []string, redactFlags []string, redactIndexes []int) ([]byte, error) { + out, err := restoreCmd.Run(ctx, "proxmox-backup-manager", args...) + if err == nil { + return out, nil + } + redacted := redactCLIArgs(args, redactFlags) + for _, idx := range redactIndexes { + if idx >= 0 && idx < len(redacted) { + redacted[idx] = "" + } + } + return out, fmt.Errorf("proxmox-backup-manager %s failed: %w", strings.Join(redacted, " "), err) +} + +func runPBSManager(ctx context.Context, args ...string) ([]byte, error) { + return runPBSManagerRedacted(ctx, args, nil, nil) +} + +func runPBSManagerSensitive(ctx context.Context, args []string, redactFlags ...string) ([]byte, error) { + return runPBSManagerRedacted(ctx, args, redactFlags, nil) +} + +func unwrapPBSJSONData(raw []byte) []byte { + trimmed := strings.TrimSpace(string(raw)) + if trimmed == "" { + return nil + } + var wrapper map[string]json.RawMessage + if err := json.Unmarshal([]byte(trimmed), &wrapper); err != nil { + return []byte(trimmed) + } + if data, ok := wrapper["data"]; ok && len(bytesTrimSpace(data)) > 0 { + return data + } + return []byte(trimmed) +} + +func bytesTrimSpace(b []byte) []byte { + return []byte(strings.TrimSpace(string(b))) +} + +func parsePBSListIDs(raw []byte, candidateKeys ...string) ([]string, error) { + data := unwrapPBSJSONData(raw) + if len(data) == 0 { + return nil, nil + } + + keys := make([]string, 0, len(candidateKeys)) + for _, k := range candidateKeys { + k = strings.TrimSpace(k) + if k == "" { + continue + } + keys = append(keys, k) + } + if len(keys) == 0 { + return nil, fmt.Errorf("no candidate keys provided for PBS list ID parsing") + } + + var rows []map[string]any + if err := json.Unmarshal(data, &rows); err != nil { + return nil, err + } + + out := make([]string, 0, len(rows)) + seen := make(map[string]struct{}, len(rows)) + for idx, row := range rows { + id := "" + for _, k := range keys { + v, ok := row[k] + if !ok || v == nil { + continue + } + s, ok := v.(string) + if !ok { + continue + } + id = strings.TrimSpace(s) + if id != "" { + break + } + } + if id == "" { + available := make([]string, 0, len(row)) + for k := range row { + available = append(available, k) + } + sort.Strings(available) + return nil, fmt.Errorf("failed to parse PBS list row %d: none of %v present as non-empty string (available keys: %v)", idx, keys, available) + } + if _, ok := seen[id]; ok { + continue + } + seen[id] = struct{}{} + out = append(out, id) + } + sort.Strings(out) + return out, nil +} + +func ensurePBSServicesForAPI(ctx context.Context, logger *logging.Logger) error { + if logger == nil { + logger = logging.GetDefaultLogger() + } + + if !isRealRestoreFS(restoreFS) { + return fmt.Errorf("non-system filesystem in use") + } + if os.Geteuid() != 0 { + return fmt.Errorf("requires root privileges") + } + + if _, err := restoreCmd.Run(ctx, "proxmox-backup-manager", "version"); err != nil { + return fmt.Errorf("proxmox-backup-manager not available: %w", err) + } + + // Best-effort: ensure services are started before API apply. + startCtx, cancel := context.WithTimeout(ctx, 2*serviceStartTimeout+serviceVerifyTimeout+5*time.Second) + defer cancel() + if err := startPBSServices(startCtx, logger); err != nil { + return err + } + return nil +} + +func applyPBSRemoteCfgViaAPI(ctx context.Context, logger *logging.Logger, stageRoot string, strict bool) error { + remoteRaw, present, err := readStageFileOptional(stageRoot, "etc/proxmox-backup/remote.cfg") + if err != nil { + return err + } + if !present { + return nil + } + sections, err := parseProxmoxNotificationSections(remoteRaw) + if err != nil { + return fmt.Errorf("parse staged remote.cfg: %w", err) + } + + desired := make(map[string]proxmoxNotificationSection, len(sections)) + for _, s := range sections { + name := strings.TrimSpace(s.Name) + if name == "" { + continue + } + desired[name] = s + } + + if strict { + out, err := runPBSManager(ctx, "remote", "list", "--output-format=json") + if err != nil { + return err + } + current, err := parsePBSListIDs(out, "id", "name") + if err != nil { + return fmt.Errorf("parse remote list: %w", err) + } + for _, id := range current { + if _, ok := desired[id]; ok { + continue + } + if _, err := runPBSManager(ctx, "remote", "remove", id); err != nil { + logger.Warning("PBS API apply: remote remove %s failed (continuing): %v", id, err) + } + } + } + + ids := make([]string, 0, len(desired)) + for id := range desired { + ids = append(ids, id) + } + sort.Strings(ids) + for _, id := range ids { + s := desired[id] + flags := buildProxmoxManagerFlags(s.Entries) + createArgs := append([]string{"remote", "create", id}, flags...) + if _, err := runPBSManagerSensitive(ctx, createArgs, "--password"); err != nil { + updateArgs := append([]string{"remote", "update", id}, flags...) + if _, upErr := runPBSManagerSensitive(ctx, updateArgs, "--password"); upErr != nil { + return fmt.Errorf("remote %s: %v (create) / %v (update)", id, err, upErr) + } + } + } + + return nil +} + +func applyPBSS3CfgViaAPI(ctx context.Context, logger *logging.Logger, stageRoot string, strict bool) error { + s3Raw, present, err := readStageFileOptional(stageRoot, "etc/proxmox-backup/s3.cfg") + if err != nil { + return err + } + if !present { + return nil + } + sections, err := parseProxmoxNotificationSections(s3Raw) + if err != nil { + return fmt.Errorf("parse staged s3.cfg: %w", err) + } + + desired := make(map[string]proxmoxNotificationSection, len(sections)) + for _, s := range sections { + id := strings.TrimSpace(s.Name) + if id == "" { + continue + } + desired[id] = s + } + + if strict { + out, err := runPBSManager(ctx, "s3", "endpoint", "list", "--output-format=json") + if err != nil { + return err + } + current, err := parsePBSListIDs(out, "id", "name") + if err != nil { + return fmt.Errorf("parse s3 endpoint list: %w", err) + } + for _, id := range current { + if _, ok := desired[id]; ok { + continue + } + if _, err := runPBSManager(ctx, "s3", "endpoint", "remove", id); err != nil { + logger.Warning("PBS API apply: s3 endpoint remove %s failed (continuing): %v", id, err) + } + } + } + + ids := make([]string, 0, len(desired)) + for id := range desired { + ids = append(ids, id) + } + sort.Strings(ids) + for _, id := range ids { + s := desired[id] + flags := buildProxmoxManagerFlags(s.Entries) + createArgs := append([]string{"s3", "endpoint", "create", id}, flags...) + if _, err := runPBSManagerSensitive(ctx, createArgs, "--access-key", "--secret-key"); err != nil { + updateArgs := append([]string{"s3", "endpoint", "update", id}, flags...) + if _, upErr := runPBSManagerSensitive(ctx, updateArgs, "--access-key", "--secret-key"); upErr != nil { + return fmt.Errorf("s3 endpoint %s: %v (create) / %v (update)", id, err, upErr) + } + } + } + + return nil +} + +func applyPBSDatastoreCfgViaAPI(ctx context.Context, logger *logging.Logger, stageRoot string, strict bool) error { + dsRaw, present, err := readStageFileOptional(stageRoot, "etc/proxmox-backup/datastore.cfg") + if err != nil { + return err + } + if !present { + return nil + } + sections, err := parseProxmoxNotificationSections(dsRaw) + if err != nil { + return fmt.Errorf("parse staged datastore.cfg: %w", err) + } + + desired := make(map[string]proxmoxNotificationSection, len(sections)) + for _, s := range sections { + name := strings.TrimSpace(s.Name) + if name == "" { + continue + } + desired[name] = s + } + + type dsRow struct { + Name string `json:"name"` + Store string `json:"store"` + ID string `json:"id"` + Path string `json:"path"` + } + currentPaths := make(map[string]string) + if out, err := runPBSManager(ctx, "datastore", "list", "--output-format=json"); err == nil { + var rows []dsRow + if err := json.Unmarshal(unwrapPBSJSONData(out), &rows); err == nil { + for _, row := range rows { + name := strings.TrimSpace(row.Name) + if name == "" { + name = strings.TrimSpace(row.Store) + } + if name == "" { + name = strings.TrimSpace(row.ID) + } + if name == "" { + continue + } + currentPaths[name] = strings.TrimSpace(row.Path) + } + } + } + + if strict { + current := make([]string, 0, len(currentPaths)) + for name := range currentPaths { + current = append(current, name) + } + sort.Strings(current) + for _, name := range current { + if _, ok := desired[name]; ok { + continue + } + if _, err := runPBSManager(ctx, "datastore", "remove", name); err != nil { + logger.Warning("PBS API apply: datastore remove %s failed (continuing): %v", name, err) + } + } + } + + names := make([]string, 0, len(desired)) + for name := range desired { + names = append(names, name) + } + sort.Strings(names) + for _, name := range names { + s := desired[name] + path, entries, ok := popEntryValue(s.Entries, "path") + if !ok || strings.TrimSpace(path) == "" { + logger.Warning("PBS API apply: datastore %s missing path; skipping", name) + continue + } + flags := buildProxmoxManagerFlags(entries) + if currentPath, exists := currentPaths[name]; exists { + if currentPath != "" && strings.TrimSpace(currentPath) != strings.TrimSpace(path) { + if strict { + if _, err := runPBSManager(ctx, "datastore", "remove", name); err != nil { + return fmt.Errorf("datastore %s: path mismatch (%s != %s) and remove failed: %w", name, currentPath, path, err) + } + createArgs := append([]string{"datastore", "create", name, path}, flags...) + if _, err := runPBSManager(ctx, createArgs...); err != nil { + return fmt.Errorf("datastore %s: recreate after path mismatch failed: %w", name, err) + } + continue + } + logger.Warning("PBS API apply: datastore %s path mismatch (%s != %s); leaving path unchanged (use Clean 1:1 restore to enforce 1:1)", name, currentPath, path) + } + + updateArgs := append([]string{"datastore", "update", name}, flags...) + if _, err := runPBSManager(ctx, updateArgs...); err != nil { + return fmt.Errorf("datastore %s: update failed: %w", name, err) + } + continue + } + + createArgs := append([]string{"datastore", "create", name, path}, flags...) + if _, err := runPBSManager(ctx, createArgs...); err != nil { + updateArgs := append([]string{"datastore", "update", name}, flags...) + if _, upErr := runPBSManager(ctx, updateArgs...); upErr != nil { + return fmt.Errorf("datastore %s: %v (create) / %v (update)", name, err, upErr) + } + } + } + + return nil +} + +func applyPBSSyncCfgViaAPI(ctx context.Context, logger *logging.Logger, stageRoot string, strict bool) error { + raw, present, err := readStageFileOptional(stageRoot, "etc/proxmox-backup/sync.cfg") + if err != nil { + return err + } + if !present { + return nil + } + sections, err := parseProxmoxNotificationSections(raw) + if err != nil { + return fmt.Errorf("parse staged sync.cfg: %w", err) + } + + desired := make(map[string]proxmoxNotificationSection, len(sections)) + for _, s := range sections { + id := strings.TrimSpace(s.Name) + if id == "" { + continue + } + desired[id] = s + } + + if strict { + out, err := runPBSManager(ctx, "sync-job", "list", "--output-format=json") + if err != nil { + return err + } + current, err := parsePBSListIDs(out, "id", "name") + if err != nil { + return fmt.Errorf("parse sync-job list: %w", err) + } + for _, id := range current { + if _, ok := desired[id]; ok { + continue + } + if _, err := runPBSManager(ctx, "sync-job", "remove", id); err != nil { + logger.Warning("PBS API apply: sync-job remove %s failed (continuing): %v", id, err) + } + } + } + + ids := make([]string, 0, len(desired)) + for id := range desired { + ids = append(ids, id) + } + sort.Strings(ids) + for _, id := range ids { + s := desired[id] + flags := buildProxmoxManagerFlags(s.Entries) + createArgs := append([]string{"sync-job", "create", id}, flags...) + if _, err := runPBSManager(ctx, createArgs...); err != nil { + updateArgs := append([]string{"sync-job", "update", id}, flags...) + if _, upErr := runPBSManager(ctx, updateArgs...); upErr != nil { + return fmt.Errorf("sync-job %s: %v (create) / %v (update)", id, err, upErr) + } + } + } + + return nil +} + +func applyPBSVerificationCfgViaAPI(ctx context.Context, logger *logging.Logger, stageRoot string, strict bool) error { + raw, present, err := readStageFileOptional(stageRoot, "etc/proxmox-backup/verification.cfg") + if err != nil { + return err + } + if !present { + return nil + } + sections, err := parseProxmoxNotificationSections(raw) + if err != nil { + return fmt.Errorf("parse staged verification.cfg: %w", err) + } + + desired := make(map[string]proxmoxNotificationSection, len(sections)) + for _, s := range sections { + id := strings.TrimSpace(s.Name) + if id == "" { + continue + } + desired[id] = s + } + + if strict { + out, err := runPBSManager(ctx, "verify-job", "list", "--output-format=json") + if err != nil { + return err + } + current, err := parsePBSListIDs(out, "id", "name") + if err != nil { + return fmt.Errorf("parse verify-job list: %w", err) + } + for _, id := range current { + if _, ok := desired[id]; ok { + continue + } + if _, err := runPBSManager(ctx, "verify-job", "remove", id); err != nil { + logger.Warning("PBS API apply: verify-job remove %s failed (continuing): %v", id, err) + } + } + } + + ids := make([]string, 0, len(desired)) + for id := range desired { + ids = append(ids, id) + } + sort.Strings(ids) + for _, id := range ids { + s := desired[id] + flags := buildProxmoxManagerFlags(s.Entries) + createArgs := append([]string{"verify-job", "create", id}, flags...) + if _, err := runPBSManager(ctx, createArgs...); err != nil { + updateArgs := append([]string{"verify-job", "update", id}, flags...) + if _, upErr := runPBSManager(ctx, updateArgs...); upErr != nil { + return fmt.Errorf("verify-job %s: %v (create) / %v (update)", id, err, upErr) + } + } + } + + return nil +} + +func applyPBSPruneCfgViaAPI(ctx context.Context, logger *logging.Logger, stageRoot string, strict bool) error { + raw, present, err := readStageFileOptional(stageRoot, "etc/proxmox-backup/prune.cfg") + if err != nil { + return err + } + if !present { + return nil + } + sections, err := parseProxmoxNotificationSections(raw) + if err != nil { + return fmt.Errorf("parse staged prune.cfg: %w", err) + } + + desired := make(map[string]proxmoxNotificationSection, len(sections)) + for _, s := range sections { + id := strings.TrimSpace(s.Name) + if id == "" { + continue + } + desired[id] = s + } + + if strict { + out, err := runPBSManager(ctx, "prune-job", "list", "--output-format=json") + if err != nil { + return err + } + current, err := parsePBSListIDs(out, "id", "name") + if err != nil { + return fmt.Errorf("parse prune-job list: %w", err) + } + for _, id := range current { + if _, ok := desired[id]; ok { + continue + } + if _, err := runPBSManager(ctx, "prune-job", "remove", id); err != nil { + logger.Warning("PBS API apply: prune-job remove %s failed (continuing): %v", id, err) + } + } + } + + ids := make([]string, 0, len(desired)) + for id := range desired { + ids = append(ids, id) + } + sort.Strings(ids) + for _, id := range ids { + s := desired[id] + flags := buildProxmoxManagerFlags(s.Entries) + createArgs := append([]string{"prune-job", "create", id}, flags...) + if _, err := runPBSManager(ctx, createArgs...); err != nil { + updateArgs := append([]string{"prune-job", "update", id}, flags...) + if _, upErr := runPBSManager(ctx, updateArgs...); upErr != nil { + return fmt.Errorf("prune-job %s: %v (create) / %v (update)", id, err, upErr) + } + } + } + + return nil +} + +func applyPBSTrafficControlCfgViaAPI(ctx context.Context, logger *logging.Logger, stageRoot string, strict bool) error { + raw, present, err := readStageFileOptional(stageRoot, "etc/proxmox-backup/traffic-control.cfg") + if err != nil { + return err + } + if !present { + return nil + } + sections, err := parseProxmoxNotificationSections(raw) + if err != nil { + return fmt.Errorf("parse staged traffic-control.cfg: %w", err) + } + + desired := make(map[string]proxmoxNotificationSection, len(sections)) + for _, s := range sections { + name := strings.TrimSpace(s.Name) + if name == "" { + continue + } + desired[name] = s + } + + if strict { + out, err := runPBSManager(ctx, "traffic-control", "list", "--output-format=json") + if err != nil { + return err + } + current, err := parsePBSListIDs(out, "name", "id") + if err != nil { + return fmt.Errorf("parse traffic-control list: %w", err) + } + for _, name := range current { + if _, ok := desired[name]; ok { + continue + } + if _, err := runPBSManager(ctx, "traffic-control", "remove", name); err != nil { + logger.Warning("PBS API apply: traffic-control remove %s failed (continuing): %v", name, err) + } + } + } + + names := make([]string, 0, len(desired)) + for name := range desired { + names = append(names, name) + } + sort.Strings(names) + for _, name := range names { + s := desired[name] + flags := buildProxmoxManagerFlags(s.Entries) + createArgs := append([]string{"traffic-control", "create", name}, flags...) + if _, err := runPBSManager(ctx, createArgs...); err != nil { + updateArgs := append([]string{"traffic-control", "update", name}, flags...) + if _, upErr := runPBSManager(ctx, updateArgs...); upErr != nil { + return fmt.Errorf("traffic-control %s: %v (create) / %v (update)", name, err, upErr) + } + } + } + + return nil +} + +func applyPBSNodeCfgViaAPI(ctx context.Context, stageRoot string) error { + raw, present, err := readStageFileOptional(stageRoot, "etc/proxmox-backup/node.cfg") + if err != nil { + return err + } + if !present { + return nil + } + sections, err := parseProxmoxNotificationSections(raw) + if err != nil { + return fmt.Errorf("parse staged node.cfg: %w", err) + } + if len(sections) == 0 { + return nil + } + // node update applies to the local node; use the first section. + flags := buildProxmoxManagerFlags(sections[0].Entries) + args := append([]string{"node", "update"}, flags...) + if _, err := runPBSManager(ctx, args...); err != nil { + return err + } + return nil +} diff --git a/internal/orchestrator/pbs_notifications_api_apply.go b/internal/orchestrator/pbs_notifications_api_apply.go new file mode 100644 index 0000000..032debb --- /dev/null +++ b/internal/orchestrator/pbs_notifications_api_apply.go @@ -0,0 +1,243 @@ +package orchestrator + +import ( + "context" + "fmt" + "sort" + "strings" + + "github.com/tis24dev/proxsave/internal/logging" +) + +func applyPBSNotificationsViaAPI(ctx context.Context, logger *logging.Logger, stageRoot string, strict bool) error { + cfgRaw, cfgPresent, err := readStageFileOptional(stageRoot, "etc/proxmox-backup/notifications.cfg") + if err != nil { + return err + } + if !cfgPresent { + return nil + } + privRaw, _, err := readStageFileOptional(stageRoot, "etc/proxmox-backup/notifications-priv.cfg") + if err != nil { + return err + } + + cfgSections, err := parseProxmoxNotificationSections(cfgRaw) + if err != nil { + return fmt.Errorf("parse staged notifications.cfg: %w", err) + } + privSections, err := parseProxmoxNotificationSections(privRaw) + if err != nil { + return fmt.Errorf("parse staged notifications-priv.cfg: %w", err) + } + + privByKey := make(map[string][]proxmoxNotificationEntry) + privRedactFlagsByKey := make(map[string][]string) + for _, s := range privSections { + if strings.TrimSpace(s.Type) == "" || strings.TrimSpace(s.Name) == "" { + continue + } + key := fmt.Sprintf("%s:%s", strings.TrimSpace(s.Type), strings.TrimSpace(s.Name)) + privByKey[key] = append([]proxmoxNotificationEntry{}, s.Entries...) + privRedactFlagsByKey[key] = append([]string(nil), notificationRedactFlagsFromEntries(s.Entries)...) + } + + type endpointSection struct { + section proxmoxNotificationSection + redactFlags []string + redactIndex []int + positional []string + sectionKey string + endpointType string + } + + var endpoints []endpointSection + var matchers []proxmoxNotificationSection + + for _, s := range cfgSections { + typ := strings.TrimSpace(s.Type) + name := strings.TrimSpace(s.Name) + if typ == "" || name == "" { + continue + } + switch typ { + case "smtp", "sendmail", "gotify", "webhook": + key := fmt.Sprintf("%s:%s", typ, name) + if priv, ok := privByKey[key]; ok && len(priv) > 0 { + s.Entries = append(s.Entries, priv...) + } + redactFlags := notificationRedactFlags(s) + if extra := privRedactFlagsByKey[key]; len(extra) > 0 { + redactFlags = append(redactFlags, extra...) + } + + pos := []string{} + entries := s.Entries + + switch typ { + case "smtp": + recipients, remaining, ok := popEntryValue(entries, "recipients", "mailto", "mail-to") + if !ok || strings.TrimSpace(recipients) == "" { + logger.Warning("PBS notifications API apply: smtp endpoint %s missing recipients; skipping", name) + continue + } + pos = append(pos, recipients) + s.Entries = remaining + case "sendmail": + mailto, remaining, ok := popEntryValue(entries, "mailto", "mail-to", "recipients") + if !ok || strings.TrimSpace(mailto) == "" { + logger.Warning("PBS notifications API apply: sendmail endpoint %s missing mailto; skipping", name) + continue + } + pos = append(pos, mailto) + s.Entries = remaining + case "gotify": + server, remaining, ok := popEntryValue(entries, "server") + if !ok || strings.TrimSpace(server) == "" { + logger.Warning("PBS notifications API apply: gotify endpoint %s missing server; skipping", name) + continue + } + token, remaining2, ok := popEntryValue(remaining, "token") + if !ok || strings.TrimSpace(token) == "" { + logger.Warning("PBS notifications API apply: gotify endpoint %s missing token; skipping", name) + continue + } + pos = append(pos, server, token) + s.Entries = remaining2 + case "webhook": + url, remaining, ok := popEntryValue(entries, "url") + if !ok || strings.TrimSpace(url) == "" { + logger.Warning("PBS notifications API apply: webhook endpoint %s missing url; skipping", name) + continue + } + pos = append(pos, url) + s.Entries = remaining + } + + redactIndex := []int(nil) + if typ == "gotify" { + // proxmox-backup-manager notification endpoint gotify create/update + redactIndex = []int{6} + } + + endpoints = append(endpoints, endpointSection{ + section: s, + redactFlags: redactFlags, + redactIndex: redactIndex, + positional: pos, + sectionKey: key, + endpointType: typ, + }) + case "matcher": + matchers = append(matchers, s) + default: + logger.Warning("PBS notifications API apply: unknown section %q (%s); skipping", typ, name) + } + } + + // In strict mode, remove matchers first so endpoint cleanup isn't blocked by references. + desiredMatchers := make(map[string]proxmoxNotificationSection, len(matchers)) + for _, m := range matchers { + name := strings.TrimSpace(m.Name) + if name == "" { + continue + } + desiredMatchers[name] = m + } + + matcherNames := make([]string, 0, len(desiredMatchers)) + for name := range desiredMatchers { + matcherNames = append(matcherNames, name) + } + sort.Strings(matcherNames) + + if strict { + out, err := runPBSManager(ctx, "notification", "matcher", "list", "--output-format=json") + if err != nil { + return err + } + current, err := parsePBSListIDs(out, "name", "id") + if err != nil { + return fmt.Errorf("parse matcher list: %w", err) + } + for _, name := range current { + if _, ok := desiredMatchers[name]; ok { + continue + } + if _, err := runPBSManager(ctx, "notification", "matcher", "remove", name); err != nil { + // Built-in matchers may not be removable; keep going. + logger.Warning("PBS notifications API apply: matcher remove %s failed (continuing): %v", name, err) + } + } + } + + // Endpoints first (matchers refer to targets/endpoints). + for _, typ := range []string{"smtp", "sendmail", "gotify", "webhook"} { + desiredNames := make(map[string]endpointSection) + for _, e := range endpoints { + if e.endpointType != typ { + continue + } + name := strings.TrimSpace(e.section.Name) + if name == "" { + continue + } + desiredNames[name] = e + } + + names := make([]string, 0, len(desiredNames)) + for name := range desiredNames { + names = append(names, name) + } + sort.Strings(names) + + if strict { + out, err := runPBSManager(ctx, "notification", "endpoint", typ, "list", "--output-format=json") + if err != nil { + return err + } + current, err := parsePBSListIDs(out, "name", "id") + if err != nil { + return fmt.Errorf("parse endpoint list (%s): %w", typ, err) + } + for _, name := range current { + if _, ok := desiredNames[name]; ok { + continue + } + if _, err := runPBSManager(ctx, "notification", "endpoint", typ, "remove", name); err != nil { + // Built-in endpoints may not be removable; keep going. + logger.Warning("PBS notifications API apply: endpoint remove %s:%s failed (continuing): %v", typ, name, err) + } + } + } + + for _, name := range names { + e := desiredNames[name] + flags := buildProxmoxManagerFlags(e.section.Entries) + createArgs := append([]string{"notification", "endpoint", typ, "create", name}, e.positional...) + createArgs = append(createArgs, flags...) + if _, err := runPBSManagerRedacted(ctx, createArgs, e.redactFlags, e.redactIndex); err != nil { + updateArgs := append([]string{"notification", "endpoint", typ, "update", name}, e.positional...) + updateArgs = append(updateArgs, flags...) + if _, upErr := runPBSManagerRedacted(ctx, updateArgs, e.redactFlags, e.redactIndex); upErr != nil { + return fmt.Errorf("endpoint %s:%s: %v (create) / %v (update)", typ, name, err, upErr) + } + } + } + } + + // Then matchers. + for _, name := range matcherNames { + m := desiredMatchers[name] + flags := buildProxmoxManagerFlags(m.Entries) + createArgs := append([]string{"notification", "matcher", "create", name}, flags...) + if _, err := runPBSManager(ctx, createArgs...); err != nil { + updateArgs := append([]string{"notification", "matcher", "update", name}, flags...) + if _, upErr := runPBSManager(ctx, updateArgs...); upErr != nil { + return fmt.Errorf("matcher %s: %v (create) / %v (update)", name, err, upErr) + } + } + } + + return nil +} diff --git a/internal/orchestrator/pbs_notifications_api_apply_test.go b/internal/orchestrator/pbs_notifications_api_apply_test.go new file mode 100644 index 0000000..43dac96 --- /dev/null +++ b/internal/orchestrator/pbs_notifications_api_apply_test.go @@ -0,0 +1,62 @@ +package orchestrator + +import ( + "context" + "os" + "reflect" + "testing" + + "github.com/tis24dev/proxsave/internal/logging" + "github.com/tis24dev/proxsave/internal/types" +) + +func TestApplyPBSNotificationsViaAPI_CreatesEndpointAndMatcher(t *testing.T) { + origCmd := restoreCmd + origFS := restoreFS + t.Cleanup(func() { + restoreCmd = origCmd + restoreFS = origFS + }) + + fakeFS := NewFakeFS() + t.Cleanup(func() { _ = os.RemoveAll(fakeFS.Root) }) + restoreFS = fakeFS + + stageRoot := "/stage" + + if err := fakeFS.WriteFile(stageRoot+"/etc/proxmox-backup/notifications.cfg", []byte( + "smtp: Gmail-relay\n"+ + " recipients user@example.com\n"+ + " from-address pbs@example.com\n"+ + " server smtp.gmail.com\n"+ + " port 587\n"+ + " username user\n"+ + "\n"+ + "matcher: default-matcher\n"+ + " target Gmail-relay\n", + ), 0o640); err != nil { + t.Fatalf("write staged notifications.cfg: %v", err) + } + if err := fakeFS.WriteFile(stageRoot+"/etc/proxmox-backup/notifications-priv.cfg", []byte( + "smtp: Gmail-relay\n"+ + " password secret123\n", + ), 0o600); err != nil { + t.Fatalf("write staged notifications-priv.cfg: %v", err) + } + + runner := &fakeCommandRunner{} + restoreCmd = runner + + logger := logging.New(types.LogLevelDebug, false) + if err := applyPBSNotificationsViaAPI(context.Background(), logger, stageRoot, false); err != nil { + t.Fatalf("applyPBSNotificationsViaAPI error: %v", err) + } + + want := []string{ + "proxmox-backup-manager notification endpoint smtp create Gmail-relay user@example.com --from-address pbs@example.com --server smtp.gmail.com --port 587 --username user --password secret123", + "proxmox-backup-manager notification matcher create default-matcher --target Gmail-relay", + } + if !reflect.DeepEqual(runner.calls, want) { + t.Fatalf("calls=%v want %v", runner.calls, want) + } +} diff --git a/internal/orchestrator/pbs_restore_behavior.go b/internal/orchestrator/pbs_restore_behavior.go new file mode 100644 index 0000000..a55b57d --- /dev/null +++ b/internal/orchestrator/pbs_restore_behavior.go @@ -0,0 +1,33 @@ +package orchestrator + +// PBSRestoreBehavior controls how PBS objects are reconciled during staged apply. +// It is intentionally chosen at restore time (UI), not via backup.env. +type PBSRestoreBehavior int + +const ( + PBSRestoreBehaviorUnspecified PBSRestoreBehavior = iota + PBSRestoreBehaviorMerge + PBSRestoreBehaviorClean +) + +func (b PBSRestoreBehavior) String() string { + switch b { + case PBSRestoreBehaviorMerge: + return "merge" + case PBSRestoreBehaviorClean: + return "clean-1to1" + default: + return "unspecified" + } +} + +func (b PBSRestoreBehavior) DisplayName() string { + switch b { + case PBSRestoreBehaviorMerge: + return "Merge (existing PBS)" + case PBSRestoreBehaviorClean: + return "Clean 1:1 (fresh PBS install)" + default: + return "Unspecified" + } +} diff --git a/internal/orchestrator/pbs_staged_apply.go b/internal/orchestrator/pbs_staged_apply.go index 3111daa..be72037 100644 --- a/internal/orchestrator/pbs_staged_apply.go +++ b/internal/orchestrator/pbs_staged_apply.go @@ -2,6 +2,7 @@ package orchestrator import ( "context" + "encoding/json" "errors" "fmt" "os" @@ -39,34 +40,151 @@ func maybeApplyPBSConfigsFromStage(ctx context.Context, logger *logging.Logger, return nil } - if plan.HasCategoryID("datastore_pbs") { - if err := applyPBSS3CfgFromStage(ctx, logger, stageRoot); err != nil { - logger.Warning("PBS staged apply: s3.cfg: %v", err) + behavior := plan.PBSRestoreBehavior + strict := behavior == PBSRestoreBehaviorClean + allowFileFallback := behavior == PBSRestoreBehaviorClean + + needsAPI := plan.HasCategoryID("pbs_host") || plan.HasCategoryID("datastore_pbs") || plan.HasCategoryID("pbs_remotes") || plan.HasCategoryID("pbs_jobs") + apiAvailable := false + if needsAPI { + if err := ensurePBSServicesForAPI(ctx, logger); err != nil { + if allowFileFallback { + logger.Warning("PBS API apply unavailable; falling back to file-based staged apply where possible: %v", err) + } else { + logger.Warning("PBS API apply unavailable; skipping API-applied PBS categories (merge mode): %v", err) + } + } else { + apiAvailable = true } - if err := applyPBSDatastoreCfgFromStage(ctx, logger, stageRoot); err != nil { - logger.Warning("PBS staged apply: datastore.cfg: %v", err) + } + + if plan.HasCategoryID("pbs_host") { + // Always restore file-only configs (no stable API coverage yet). + // ACME should be applied before node config (node.cfg references ACME accounts/plugins). + for _, rel := range []string{ + "etc/proxmox-backup/acme/accounts.cfg", + "etc/proxmox-backup/acme/plugins.cfg", + "etc/proxmox-backup/metricserver.cfg", + "etc/proxmox-backup/proxy.cfg", + } { + if err := applyPBSConfigFileFromStage(ctx, logger, stageRoot, rel); err != nil { + logger.Warning("PBS staged apply: %s: %v", rel, err) + } + } + + if apiAvailable { + if err := applyPBSTrafficControlCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + logger.Warning("PBS API apply: traffic-control failed: %v", err) + if allowFileFallback { + logger.Warning("PBS staged apply: falling back to file-based traffic-control.cfg") + _ = applyPBSConfigFileFromStage(ctx, logger, stageRoot, "etc/proxmox-backup/traffic-control.cfg") + } + } + if err := applyPBSNodeCfgViaAPI(ctx, stageRoot); err != nil { + logger.Warning("PBS API apply: node config failed: %v", err) + if allowFileFallback { + logger.Warning("PBS staged apply: falling back to file-based node.cfg") + _ = applyPBSConfigFileFromStage(ctx, logger, stageRoot, "etc/proxmox-backup/node.cfg") + } + } + } else if allowFileFallback { + for _, rel := range []string{ + "etc/proxmox-backup/traffic-control.cfg", + "etc/proxmox-backup/node.cfg", + } { + if err := applyPBSConfigFileFromStage(ctx, logger, stageRoot, rel); err != nil { + logger.Warning("PBS staged apply: %s: %v", rel, err) + } + } + } else { + logging.DebugStep(logger, "pbs staged apply", "Skipping node.cfg/traffic-control.cfg: merge mode requires PBS API apply") } } - if plan.HasCategoryID("pbs_jobs") { - if err := applyPBSJobConfigsFromStage(ctx, logger, stageRoot); err != nil { - logger.Warning("PBS staged apply: job configs: %v", err) + + if plan.HasCategoryID("datastore_pbs") { + if apiAvailable { + if err := applyPBSS3CfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + logger.Warning("PBS API apply: s3.cfg failed: %v", err) + if allowFileFallback { + logger.Warning("PBS staged apply: falling back to file-based s3.cfg") + _ = applyPBSS3CfgFromStage(ctx, logger, stageRoot) + } + } + if err := applyPBSDatastoreCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + logger.Warning("PBS API apply: datastore.cfg failed: %v", err) + if allowFileFallback { + logger.Warning("PBS staged apply: falling back to file-based datastore.cfg") + _ = applyPBSDatastoreCfgFromStage(ctx, logger, stageRoot) + } + } + } else if allowFileFallback { + if err := applyPBSS3CfgFromStage(ctx, logger, stageRoot); err != nil { + logger.Warning("PBS staged apply: s3.cfg: %v", err) + } + if err := applyPBSDatastoreCfgFromStage(ctx, logger, stageRoot); err != nil { + logger.Warning("PBS staged apply: datastore.cfg: %v", err) + } + } else { + logging.DebugStep(logger, "pbs staged apply", "Skipping datastore.cfg/s3.cfg: merge mode requires PBS API apply") } } + if plan.HasCategoryID("pbs_remotes") { - if err := applyPBSRemoteCfgFromStage(ctx, logger, stageRoot); err != nil { - logger.Warning("PBS staged apply: remote.cfg: %v", err) + if apiAvailable { + if err := applyPBSRemoteCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + logger.Warning("PBS API apply: remote.cfg failed: %v", err) + if allowFileFallback { + logger.Warning("PBS staged apply: falling back to file-based remote.cfg") + _ = applyPBSRemoteCfgFromStage(ctx, logger, stageRoot) + } + } + } else if allowFileFallback { + if err := applyPBSRemoteCfgFromStage(ctx, logger, stageRoot); err != nil { + logger.Warning("PBS staged apply: remote.cfg: %v", err) + } + } else { + logging.DebugStep(logger, "pbs staged apply", "Skipping remote.cfg: merge mode requires PBS API apply") } } - if plan.HasCategoryID("pbs_host") { - if err := applyPBSHostConfigsFromStage(ctx, logger, stageRoot); err != nil { - logger.Warning("PBS staged apply: host configs: %v", err) + + if plan.HasCategoryID("pbs_jobs") { + if apiAvailable { + if err := applyPBSSyncCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + logger.Warning("PBS API apply: sync jobs failed: %v", err) + if allowFileFallback { + logger.Warning("PBS staged apply: falling back to file-based job configs") + _ = applyPBSJobConfigsFromStage(ctx, logger, stageRoot) + } + } + if err := applyPBSVerificationCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + logger.Warning("PBS API apply: verification jobs failed: %v", err) + if allowFileFallback { + logger.Warning("PBS staged apply: falling back to file-based job configs") + _ = applyPBSJobConfigsFromStage(ctx, logger, stageRoot) + } + } + if err := applyPBSPruneCfgViaAPI(ctx, logger, stageRoot, strict); err != nil { + logger.Warning("PBS API apply: prune jobs failed: %v", err) + if allowFileFallback { + logger.Warning("PBS staged apply: falling back to file-based job configs") + _ = applyPBSJobConfigsFromStage(ctx, logger, stageRoot) + } + } + } else if allowFileFallback { + if err := applyPBSJobConfigsFromStage(ctx, logger, stageRoot); err != nil { + logger.Warning("PBS staged apply: job configs: %v", err) + } + } else { + logging.DebugStep(logger, "pbs staged apply", "Skipping sync/verification/prune configs: merge mode requires PBS API apply") } } + if plan.HasCategoryID("pbs_tape") { if err := applyPBSTapeConfigsFromStage(ctx, logger, stageRoot); err != nil { logger.Warning("PBS staged apply: tape configs: %v", err) } } + return nil } @@ -84,27 +202,6 @@ func applyPBSS3CfgFromStage(ctx context.Context, logger *logging.Logger, stageRo return applyPBSConfigFileFromStage(ctx, logger, stageRoot, "etc/proxmox-backup/s3.cfg") } -func applyPBSHostConfigsFromStage(ctx context.Context, logger *logging.Logger, stageRoot string) (err error) { - done := logging.DebugStart(logger, "pbs staged apply host configs", "stage=%s", stageRoot) - defer func() { done(err) }() - - // ACME should be applied before node.cfg (node.cfg references ACME account/plugins). - paths := []string{ - "etc/proxmox-backup/acme/accounts.cfg", - "etc/proxmox-backup/acme/plugins.cfg", - "etc/proxmox-backup/metricserver.cfg", - "etc/proxmox-backup/traffic-control.cfg", - "etc/proxmox-backup/proxy.cfg", - "etc/proxmox-backup/node.cfg", - } - for _, rel := range paths { - if err := applyPBSConfigFileFromStage(ctx, logger, stageRoot, rel); err != nil { - logger.Warning("PBS staged apply: %s: %v", rel, err) - } - } - return nil -} - func applyPBSTapeConfigsFromStage(ctx context.Context, logger *logging.Logger, stageRoot string) (err error) { done := logging.DebugStart(logger, "pbs staged apply tape configs", "stage=%s", stageRoot) defer func() { done(err) }() @@ -170,6 +267,32 @@ func applyPBSDatastoreCfgFromStage(ctx context.Context, logger *logging.Logger, return nil } + if reason := detectPBSDatastoreCfgDuplicateKeys(blocks); reason != "" { + logger.Warning("PBS staged apply: staged datastore.cfg looks invalid (%s); attempting recovery from pbs_datastore_inventory.json", reason) + if recovered, src, recErr := loadPBSDatastoreCfgFromInventory(stageRoot); recErr != nil { + logger.Warning("PBS staged apply: unable to recover datastore.cfg from inventory (%v); leaving current configuration unchanged", recErr) + return nil + } else if strings.TrimSpace(recovered) == "" { + logger.Warning("PBS staged apply: recovered datastore.cfg from %s is empty; leaving current configuration unchanged", src) + return nil + } else { + normalized, fixed = normalizePBSDatastoreCfgContent(recovered) + if fixed > 0 { + logger.Warning("PBS staged apply: recovered datastore.cfg normalization fixed %d malformed line(s) (properties must be indented)", fixed) + } + blocks, err = parsePBSDatastoreCfgBlocks(normalized) + if err != nil { + logger.Warning("PBS staged apply: recovered datastore.cfg from %s is still invalid (%v); leaving current configuration unchanged", src, err) + return nil + } + if reason := detectPBSDatastoreCfgDuplicateKeys(blocks); reason != "" { + logger.Warning("PBS staged apply: recovered datastore.cfg from %s still looks invalid (%s); leaving current configuration unchanged", src, reason) + return nil + } + logger.Info("PBS staged apply: datastore.cfg recovered from %s", src) + } + } + var applyBlocks []pbsDatastoreBlock var deferred []pbsDatastoreBlock for _, b := range blocks { @@ -213,6 +336,90 @@ func applyPBSDatastoreCfgFromStage(ctx context.Context, logger *logging.Logger, return nil } +type pbsDatastoreInventoryRestoreLite struct { + Files map[string]struct { + Content string `json:"content"` + } `json:"files"` + Datastores []struct { + Name string `json:"name"` + Path string `json:"path"` + Comment string `json:"comment"` + } `json:"datastores"` +} + +func loadPBSDatastoreCfgFromInventory(stageRoot string) (string, string, error) { + inventoryPath := filepath.Join(stageRoot, "var/lib/proxsave-info/commands/pbs/pbs_datastore_inventory.json") + raw, err := restoreFS.ReadFile(inventoryPath) + if err != nil { + return "", "", fmt.Errorf("read inventory %s: %w", inventoryPath, err) + } + trimmed := strings.TrimSpace(string(raw)) + if trimmed == "" { + return "", "", fmt.Errorf("inventory %s is empty", inventoryPath) + } + + var report pbsDatastoreInventoryRestoreLite + if err := json.Unmarshal([]byte(trimmed), &report); err != nil { + return "", "", fmt.Errorf("parse inventory %s: %w", inventoryPath, err) + } + + if report.Files != nil { + if snap := strings.TrimSpace(report.Files["pbs_datastore_cfg"].Content); snap != "" { + return report.Files["pbs_datastore_cfg"].Content, "pbs_datastore_inventory.json.files[pbs_datastore_cfg].content", nil + } + } + + // Fallback: generate a minimal datastore.cfg from the inventory's datastore list. + var out strings.Builder + for _, ds := range report.Datastores { + name := strings.TrimSpace(ds.Name) + path := strings.TrimSpace(ds.Path) + if name == "" || path == "" { + continue + } + if out.Len() > 0 { + out.WriteString("\n") + } + out.WriteString(fmt.Sprintf("datastore: %s\n", name)) + if comment := strings.TrimSpace(ds.Comment); comment != "" { + out.WriteString(fmt.Sprintf(" comment %s\n", comment)) + } + out.WriteString(fmt.Sprintf(" path %s\n", path)) + } + + generated := strings.TrimSpace(out.String()) + if generated == "" { + return "", "", fmt.Errorf("inventory %s contains no usable datastore definitions", inventoryPath) + } + return out.String(), "pbs_datastore_inventory.json.datastores", nil +} + +func detectPBSDatastoreCfgDuplicateKeys(blocks []pbsDatastoreBlock) string { + for _, block := range blocks { + seen := map[string]int{} + for _, line := range block.Lines { + trimmed := strings.TrimSpace(line) + if trimmed == "" || strings.HasPrefix(trimmed, "#") || strings.HasPrefix(trimmed, "datastore:") { + continue + } + + fields := strings.Fields(trimmed) + if len(fields) == 0 { + continue + } + key := strings.TrimSpace(fields[0]) + if key == "" { + continue + } + seen[key]++ + if seen[key] > 1 { + return fmt.Sprintf("datastore %s has duplicate key %q", strings.TrimSpace(block.Name), key) + } + } + } + return "" +} + func parsePBSDatastoreCfgBlocks(content string) ([]pbsDatastoreBlock, error) { var blocks []pbsDatastoreBlock var current *pbsDatastoreBlock @@ -293,7 +500,7 @@ func shouldApplyPBSDatastoreBlock(block pbsDatastoreBlock, logger *logging.Logge } if hasData { - if warn := validatePBSDatastoreReadOnly(path, logger); warn != "" { + if warn := validatePBSDatastoreReadOnly(path); warn != "" && logger != nil { logger.Warning("PBS datastore preflight: %s", warn) } return true, "" diff --git a/internal/orchestrator/pbs_staged_apply_test.go b/internal/orchestrator/pbs_staged_apply_test.go index ecb7abb..0ee7ab7 100644 --- a/internal/orchestrator/pbs_staged_apply_test.go +++ b/internal/orchestrator/pbs_staged_apply_test.go @@ -3,6 +3,7 @@ package orchestrator import ( "context" "os" + "strings" "testing" ) @@ -76,3 +77,71 @@ func TestShouldApplyPBSDatastoreBlock_AllowsMountLikePathsOnRootFS(t *testing.T) t.Fatalf("expected datastore block to be applied, got ok=false reason=%q", reason) } } + +func TestApplyPBSDatastoreCfgFromStage_RecoversFromInventoryWhenFlattened(t *testing.T) { + origFS := restoreFS + t.Cleanup(func() { restoreFS = origFS }) + + fakeFS := NewFakeFS() + t.Cleanup(func() { _ = os.RemoveAll(fakeFS.Root) }) + restoreFS = fakeFS + + stageRoot := "/stage" + + // This is a representative "flattened" datastore.cfg produced by an unsafe prefilter + // (headers separated from their respective properties). + staged := strings.Join([]string{ + "comment Local ext4 disk datastore", + "comment Synology NFS sync target", + "datastore: Data1", + "datastore: Synology-Archive", + "gc-schedule 05:00", + "gc-schedule 06:30", + "notification-mode notification-system", + "notification-mode notification-system", + "path /mnt/Synology_NFS/PBS_Backup", + "path /mnt/datastore/Data1", + "", + }, "\n") + if err := fakeFS.WriteFile(stageRoot+"/etc/proxmox-backup/datastore.cfg", []byte(staged), 0o640); err != nil { + t.Fatalf("write staged datastore.cfg: %v", err) + } + + // Inventory contains a verbatim snapshot of the original datastore.cfg, which should be preferred. + inventory := `{"files":{"pbs_datastore_cfg":{"content":"datastore: Synology-Archive\n comment Synology NFS sync target\n gc-schedule 05:00\n notification-mode notification-system\n path /mnt/Synology_NFS/PBS_Backup\n\ndatastore: Data1\n comment Local ext4 disk datastore\n gc-schedule 06:30\n notification-mode notification-system\n path /mnt/datastore/Data1\n"}}}` + if err := fakeFS.WriteFile(stageRoot+"/var/lib/proxsave-info/commands/pbs/pbs_datastore_inventory.json", []byte(inventory), 0o640); err != nil { + t.Fatalf("write inventory: %v", err) + } + + if err := applyPBSDatastoreCfgFromStage(context.Background(), newTestLogger(), stageRoot); err != nil { + t.Fatalf("applyPBSDatastoreCfgFromStage error: %v", err) + } + + out, err := fakeFS.ReadFile("/etc/proxmox-backup/datastore.cfg") + if err != nil { + t.Fatalf("read restored datastore.cfg: %v", err) + } + + blocks, err := parsePBSDatastoreCfgBlocks(string(out)) + if err != nil { + t.Fatalf("parse restored datastore.cfg: %v", err) + } + if len(blocks) != 2 { + t.Fatalf("expected 2 datastore blocks, got %d", len(blocks)) + } + if reason := detectPBSDatastoreCfgDuplicateKeys(blocks); reason != "" { + t.Fatalf("restored datastore.cfg still has duplicate keys: %s", reason) + } + + // Verify the expected datastore paths are preserved. + paths := map[string]string{} + for _, b := range blocks { + paths[b.Name] = b.Path + } + if paths["Synology-Archive"] != "/mnt/Synology_NFS/PBS_Backup" { + t.Fatalf("Synology-Archive path=%q", paths["Synology-Archive"]) + } + if paths["Data1"] != "/mnt/datastore/Data1" { + t.Fatalf("Data1 path=%q", paths["Data1"]) + } +} diff --git a/internal/orchestrator/pve_safe_apply_mappings.go b/internal/orchestrator/pve_safe_apply_mappings.go index e907a6f..dea21ea 100644 --- a/internal/orchestrator/pve_safe_apply_mappings.go +++ b/internal/orchestrator/pve_safe_apply_mappings.go @@ -315,4 +315,3 @@ func renderMappingEntry(entry map[string]string) string { } return strings.Join(parts, ",") } - diff --git a/internal/orchestrator/pve_staged_apply_test.go b/internal/orchestrator/pve_staged_apply_test.go index 6015116..a5561be 100644 --- a/internal/orchestrator/pve_staged_apply_test.go +++ b/internal/orchestrator/pve_staged_apply_test.go @@ -19,7 +19,7 @@ func TestPVEStorageMountGuardItems_BuildsExpectedTargets(t *testing.T) { fstabMounts := map[string]struct{}{ "/mnt/datastore": {}, "/mnt/Synology_NFS": {}, - "/": {}, + "/": {}, } items := pveStorageMountGuardItems(candidates, mountCandidates, fstabMounts) diff --git a/internal/orchestrator/restore.go b/internal/orchestrator/restore.go index 0369703..2f14d20 100644 --- a/internal/orchestrator/restore.go +++ b/internal/orchestrator/restore.go @@ -26,16 +26,15 @@ import ( var ErrRestoreAborted = errors.New("restore workflow aborted by user") var ( - serviceStopTimeout = 45 * time.Second - serviceStopNoBlockTimeout = 15 * time.Second - serviceStartTimeout = 30 * time.Second - serviceVerifyTimeout = 30 * time.Second - serviceStatusCheckTimeout = 5 * time.Second - servicePollInterval = 500 * time.Millisecond - serviceRetryDelay = 500 * time.Millisecond - restoreLogSequence uint64 - restoreGlob = filepath.Glob - prepareDecryptedBackupFunc = prepareDecryptedBackup + serviceStopTimeout = 45 * time.Second + serviceStopNoBlockTimeout = 15 * time.Second + serviceStartTimeout = 30 * time.Second + serviceVerifyTimeout = 30 * time.Second + serviceStatusCheckTimeout = 5 * time.Second + servicePollInterval = 500 * time.Millisecond + serviceRetryDelay = 500 * time.Millisecond + restoreLogSequence uint64 + restoreGlob = filepath.Glob ) // RestoreAbortInfo contains information about an aborted restore with network rollback. @@ -763,8 +762,17 @@ func confirmRestoreAction(ctx context.Context, reader *bufio.Reader, cand *decry manifest := cand.Manifest fmt.Println() fmt.Printf("Selected backup: %s (%s)\n", cand.DisplayBase, manifest.CreatedAt.Format("2006-01-02 15:04:05")) - fmt.Println("Restore destination: / (system root; original paths will be preserved)") - fmt.Println("WARNING: This operation will overwrite configuration files on this system.") + cleanDest := filepath.Clean(strings.TrimSpace(dest)) + if cleanDest == "" || cleanDest == "." { + cleanDest = string(os.PathSeparator) + } + if cleanDest == string(os.PathSeparator) { + fmt.Println("Restore destination: / (system root; original paths will be preserved)") + fmt.Println("WARNING: This operation will overwrite configuration files on this system.") + } else { + fmt.Printf("Restore destination: %s (original paths will be preserved under this directory)\n", cleanDest) + fmt.Printf("WARNING: This operation will overwrite existing files under %s.\n", cleanDest) + } fmt.Println("Type RESTORE to proceed or 0 to cancel.") for { @@ -978,7 +986,7 @@ func applyVMConfigs(ctx context.Context, entries []vmEntry, logger *logging.Logg logger.Warning("VM apply aborted: %v", err) return applied, failed } - target := fmt.Sprintf("/nodes/%s/%s/%s/config", detectNodeForVM(vm), vm.Kind, vm.VMID) + target := fmt.Sprintf("/nodes/%s/%s/%s/config", detectNodeForVM(), vm.Kind, vm.VMID) args := []string{"set", target, "--filename", vm.Path} if err := runPvesh(ctx, logger, args); err != nil { logger.Warning("Failed to apply %s (vmid=%s kind=%s): %v", target, vm.VMID, vm.Kind, err) @@ -995,7 +1003,7 @@ func applyVMConfigs(ctx context.Context, entries []vmEntry, logger *logging.Logg return applied, failed } -func detectNodeForVM(vm vmEntry) string { +func detectNodeForVM() string { host, _ := os.Hostname() host = shortHost(host) if host != "" { @@ -1564,7 +1572,7 @@ func extractTarEntry(tarReader *tar.Reader, header *tar.Header, destRoot string, case tar.TypeSymlink: return extractSymlink(target, header, cleanDestRoot, logger) case tar.TypeLink: - return extractHardlink(target, header, cleanDestRoot, logger) + return extractHardlink(target, header, cleanDestRoot) default: logger.Debug("Skipping unsupported file type %d: %s", header.Typeflag, header.Name) return nil @@ -1715,7 +1723,7 @@ func extractSymlink(target string, header *tar.Header, destRoot string, logger * } // extractHardlink creates a hard link -func extractHardlink(target string, header *tar.Header, destRoot string, logger *logging.Logger) error { +func extractHardlink(target string, header *tar.Header, destRoot string) error { // Validate hard link target linkName := header.Linkname diff --git a/internal/orchestrator/restore_access_control.go b/internal/orchestrator/restore_access_control.go index efdda05..118168f 100644 --- a/internal/orchestrator/restore_access_control.go +++ b/internal/orchestrator/restore_access_control.go @@ -91,6 +91,13 @@ func maybeApplyAccessControlFromStage(ctx context.Context, logger *logging.Logge } func applyPBSAccessControlFromStage(ctx context.Context, logger *logging.Logger, stageRoot string) (err error) { + if ctx == nil { + ctx = context.Background() + } + if err := ctx.Err(); err != nil { + return err + } + done := logging.DebugStart(logger, "pbs access control apply", "stage=%s", stageRoot) defer func() { done(err) }() @@ -283,17 +290,17 @@ func applyPBSACLFromStage(logger *logging.Logger, stagedACL string) error { // - header-style (section + indented keys) // - colon-delimited line format (acl::::) if pbsConfigHasHeader(raw) { - return applyPBSACLSectionFormat(logger, raw) + return applyPBSACLSectionFormat(raw) } if isPBSACLLineFormat(raw) { - return applyPBSACLLineFormat(logger, raw) + return applyPBSACLLineFormat(raw) } logger.Warning("PBS access control: staged acl.cfg has unknown format; skipping apply") return nil } -func applyPBSACLSectionFormat(logger *logging.Logger, raw string) error { +func applyPBSACLSectionFormat(raw string) error { backupSections, err := parseProxmoxNotificationSections(raw) if err != nil { return fmt.Errorf("parse staged acl.cfg: %w", err) @@ -369,7 +376,7 @@ func parsePBSACLLine(line string) (pbsACLLine, bool) { }, true } -func applyPBSACLLineFormat(logger *logging.Logger, raw string) error { +func applyPBSACLLineFormat(raw string) error { var outLines []string var hasRootAdmin bool @@ -667,6 +674,13 @@ func mustMarshalRaw(v any) json.RawMessage { } func applyPVEAccessControlFromStage(ctx context.Context, logger *logging.Logger, stageRoot string) (err error) { + if ctx == nil { + ctx = context.Background() + } + if err := ctx.Err(); err != nil { + return err + } + done := logging.DebugStart(logger, "pve access control apply", "stage=%s", stageRoot) defer func() { done(err) }() diff --git a/internal/orchestrator/restore_access_control_ui.go b/internal/orchestrator/restore_access_control_ui.go index 2fecd71..7dee769 100644 --- a/internal/orchestrator/restore_access_control_ui.go +++ b/internal/orchestrator/restore_access_control_ui.go @@ -209,11 +209,11 @@ func maybeApplyPVEAccessControlFromClusterBackupWithUI( logger.Info("") message := fmt.Sprintf( - "Cluster backup detected.\n\n"+ - "Applying PVE access control will modify users/roles/groups/ACLs and secrets cluster-wide.\n\n"+ - "WARNING: This may lock you out or break API tokens/automation.\n\n"+ - "Safety rail: root@pam is preserved from the current system and kept Administrator on /.\n\n"+ - "Recommendation: do this from local console/IPMI, not over SSH.\n\n"+ + "Cluster backup detected.\n\n" + + "Applying PVE access control will modify users/roles/groups/ACLs and secrets cluster-wide.\n\n" + + "WARNING: This may lock you out or break API tokens/automation.\n\n" + + "Safety rail: root@pam is preserved from the current system and kept Administrator on /.\n\n" + + "Recommendation: do this from local console/IPMI, not over SSH.\n\n" + "Apply 1:1 PVE access control now?", ) applyNow, err := ui.ConfirmAction(ctx, "Apply PVE access control (cluster-wide)", message, "Apply 1:1 (expert)", "Skip apply", 90*time.Second, false) @@ -488,4 +488,3 @@ func buildAccessControlRollbackScript(markerPath, backupPath, logPath string) st ) return strings.Join(lines, "\n") + "\n" } - diff --git a/internal/orchestrator/restore_errors_test.go b/internal/orchestrator/restore_errors_test.go index d324e4c..cad33a8 100644 --- a/internal/orchestrator/restore_errors_test.go +++ b/internal/orchestrator/restore_errors_test.go @@ -1011,8 +1011,7 @@ func TestExtractHardlink_AbsoluteTargetRejectedError(t *testing.T) { Typeflag: tar.TypeLink, } - logger := logging.New(logging.GetDefaultLogger().GetLevel(), false) - err := extractHardlink("/tmp/link", header, "/tmp", logger) + err := extractHardlink("/tmp/link", header, "/tmp") if err == nil || !strings.Contains(err.Error(), "absolute hardlink target not allowed") { t.Fatalf("expected absolute target error, got: %v", err) } @@ -1043,8 +1042,7 @@ func TestExtractHardlink_LinkCreationFails(t *testing.T) { } linkPath := filepath.Join(fakeFS.Root, "link") - logger := logging.New(logging.GetDefaultLogger().GetLevel(), false) - err := extractHardlink(linkPath, header, fakeFS.Root, logger) + err := extractHardlink(linkPath, header, fakeFS.Root) if err == nil || !strings.Contains(err.Error(), "hardlink") { t.Fatalf("expected link creation error, got: %v", err) } diff --git a/internal/orchestrator/restore_filesystem.go b/internal/orchestrator/restore_filesystem.go index bb27ce9..9c0132d 100644 --- a/internal/orchestrator/restore_filesystem.go +++ b/internal/orchestrator/restore_filesystem.go @@ -111,11 +111,11 @@ type lsblkReport struct { } type lsblkDevice struct { - Name string `json:"name"` - Path string `json:"path"` - UUID string `json:"uuid"` - PartUUID string `json:"partuuid"` - Label string `json:"label"` + Name string `json:"name"` + Path string `json:"path"` + UUID string `json:"uuid"` + PartUUID string `json:"partuuid"` + Label string `json:"label"` Children []lsblkDevice `json:"children"` } diff --git a/internal/orchestrator/restore_firewall.go b/internal/orchestrator/restore_firewall.go index 9e655c1..64c7419 100644 --- a/internal/orchestrator/restore_firewall.go +++ b/internal/orchestrator/restore_firewall.go @@ -233,7 +233,7 @@ func maybeApplyPVEFirewallWithUI( return nil } - if err := restartPVEFirewallService(ctx, logger); err != nil { + if err := restartPVEFirewallService(ctx); err != nil { logger.Warning("PVE firewall restore: reload/restart failed: %v", err) } @@ -300,7 +300,7 @@ func applyPVEFirewallFromStage(logger *logging.Logger, stageRoot string) (applie applied = append(applied, destFirewall) } } - } else if err != nil && !errors.Is(err, os.ErrNotExist) { + } else if !errors.Is(err, os.ErrNotExist) { return applied, fmt.Errorf("stat staged firewall config %s: %w", stageFirewall, err) } @@ -382,7 +382,7 @@ func selectStageHostFirewall(logger *logging.Logger, stageRoot string) (path str return "", "", false, nil } -func restartPVEFirewallService(ctx context.Context, logger *logging.Logger) error { +func restartPVEFirewallService(ctx context.Context) error { timeoutCtx, cancel := context.WithTimeout(ctx, 10*time.Second) defer cancel() diff --git a/internal/orchestrator/restore_ha.go b/internal/orchestrator/restore_ha.go index 5341941..d69db66 100644 --- a/internal/orchestrator/restore_ha.go +++ b/internal/orchestrator/restore_ha.go @@ -278,7 +278,7 @@ func stageHasPVEHAConfig(stageRoot string) (bool, error) { for _, candidate := range candidates { if _, err := restoreFS.Stat(candidate); err == nil { return true, nil - } else if err != nil && !errors.Is(err, os.ErrNotExist) { + } else if !errors.Is(err, os.ErrNotExist) { return false, fmt.Errorf("stat %s: %w", candidate, err) } } diff --git a/internal/orchestrator/restore_ha_test.go b/internal/orchestrator/restore_ha_test.go index 3622968..c0fae98 100644 --- a/internal/orchestrator/restore_ha_test.go +++ b/internal/orchestrator/restore_ha_test.go @@ -84,4 +84,3 @@ func TestApplyPVEHAFromStage_DoesNotPruneWhenStageMissing(t *testing.T) { t.Fatalf("unexpected resources.cfg content: %q", string(data)) } } - diff --git a/internal/orchestrator/restore_notifications.go b/internal/orchestrator/restore_notifications.go index 6181c09..3774a5d 100644 --- a/internal/orchestrator/restore_notifications.go +++ b/internal/orchestrator/restore_notifications.go @@ -56,7 +56,29 @@ func maybeApplyNotificationsFromStage(ctx context.Context, logger *logging.Logge if !plan.HasCategoryID("pbs_notifications") { return nil } - return applyPBSNotificationsFromStage(ctx, logger, stageRoot) + behavior := plan.PBSRestoreBehavior + strict := behavior == PBSRestoreBehaviorClean + allowFileFallback := behavior == PBSRestoreBehaviorClean + + if err := ensurePBSServicesForAPI(ctx, logger); err != nil { + if allowFileFallback { + logger.Warning("PBS notifications API apply unavailable; falling back to file-based apply: %v", err) + return applyPBSNotificationsFromStage(ctx, logger, stageRoot) + } + logger.Warning("PBS notifications API apply unavailable; skipping apply (merge mode): %v", err) + return nil + } + + if err := applyPBSNotificationsViaAPI(ctx, logger, stageRoot, strict); err != nil { + if allowFileFallback { + logger.Warning("PBS notifications API apply failed; falling back to file-based apply: %v", err) + return applyPBSNotificationsFromStage(ctx, logger, stageRoot) + } + logger.Warning("PBS notifications API apply failed; skipping apply (merge mode): %v", err) + return nil + } + logger.Info("PBS notifications applied via API (%s)", behavior.DisplayName()) + return nil case SystemTypePVE: if !plan.HasCategoryID("pve_notifications") { return nil diff --git a/internal/orchestrator/restore_plan.go b/internal/orchestrator/restore_plan.go index 3c88564..6d54716 100644 --- a/internal/orchestrator/restore_plan.go +++ b/internal/orchestrator/restore_plan.go @@ -13,6 +13,7 @@ type RestorePlan struct { NormalCategories []Category StagedCategories []Category ExportCategories []Category + PBSRestoreBehavior PBSRestoreBehavior ClusterBackup bool ClusterSafeMode bool NeedsClusterRestore bool @@ -80,4 +81,3 @@ func (p *RestorePlan) HasCategoryID(id string) bool { } return hasCategoryID(p.NormalCategories, id) || hasCategoryID(p.StagedCategories, id) || hasCategoryID(p.ExportCategories, id) } - diff --git a/internal/orchestrator/restore_sdn.go b/internal/orchestrator/restore_sdn.go index 5cd81bf..01c9c59 100644 --- a/internal/orchestrator/restore_sdn.go +++ b/internal/orchestrator/restore_sdn.go @@ -12,6 +12,8 @@ import ( ) func maybeApplyPVESDNFromStage(ctx context.Context, logger *logging.Logger, plan *RestorePlan, stageRoot string, dryRun bool) (err error) { + _ = ctx // reserved for future timeouts/cancellation hooks + if plan == nil || plan.SystemType != SystemTypePVE || !plan.HasCategoryID("pve_sdn") { return nil } @@ -94,7 +96,7 @@ func applyPVESDNFromStage(logger *logging.Logger, stageRoot string) (applied []s applied = append(applied, destSDN) } } - } else if err != nil && !errors.Is(err, os.ErrNotExist) { + } else if !errors.Is(err, os.ErrNotExist) { return applied, fmt.Errorf("stat staged sdn %s: %w", stageSDN, err) } diff --git a/internal/orchestrator/restore_sdn_test.go b/internal/orchestrator/restore_sdn_test.go index 3a288d9..ea72096 100644 --- a/internal/orchestrator/restore_sdn_test.go +++ b/internal/orchestrator/restore_sdn_test.go @@ -80,4 +80,3 @@ func TestApplyPVESDNFromStage_NoStageData_NoChanges(t *testing.T) { t.Fatalf("expected no applied paths, got=%v", applied) } } - diff --git a/internal/orchestrator/restore_test.go b/internal/orchestrator/restore_test.go index c9d008d..80a1f10 100644 --- a/internal/orchestrator/restore_test.go +++ b/internal/orchestrator/restore_test.go @@ -512,28 +512,26 @@ func TestExtractDirectory_Success(t *testing.T) { // -------------------------------------------------------------------------- func TestExtractHardlink_AbsoluteTargetRejected(t *testing.T) { - logger := logging.New(types.LogLevelDebug, false) header := &tar.Header{ Name: "link", Linkname: "/absolute/path", Typeflag: tar.TypeLink, } - err := extractHardlink("/tmp/dest", header, "/tmp/dest", logger) + err := extractHardlink("/tmp/dest", header, "/tmp/dest") if err == nil || !strings.Contains(err.Error(), "absolute hardlink target not allowed") { t.Fatalf("expected absolute target error, got: %v", err) } } func TestExtractHardlink_EscapesRoot(t *testing.T) { - logger := logging.New(types.LogLevelDebug, false) header := &tar.Header{ Name: "link", Linkname: "../../../etc/passwd", Typeflag: tar.TypeLink, } - err := extractHardlink("/tmp/dest/link", header, "/tmp/dest", logger) + err := extractHardlink("/tmp/dest/link", header, "/tmp/dest") if err == nil || !strings.Contains(err.Error(), "escapes root") { t.Fatalf("expected escape error, got: %v", err) } @@ -544,7 +542,6 @@ func TestExtractHardlink_Success(t *testing.T) { t.Cleanup(func() { restoreFS = orig }) restoreFS = osFS{} - logger := logging.New(types.LogLevelDebug, false) destRoot := t.TempDir() originalFile := filepath.Join(destRoot, "original.txt") linkFile := filepath.Join(destRoot, "link.txt") @@ -559,7 +556,7 @@ func TestExtractHardlink_Success(t *testing.T) { Typeflag: tar.TypeLink, } - if err := extractHardlink(linkFile, header, destRoot, logger); err != nil { + if err := extractHardlink(linkFile, header, destRoot); err != nil { t.Fatalf("extractHardlink failed: %v", err) } @@ -1002,10 +999,7 @@ func TestReadVMName_FileNotFound(t *testing.T) { // -------------------------------------------------------------------------- func TestDetectNodeForVM_ReturnsHostname(t *testing.T) { - entry := vmEntry{ - Path: "/export/etc/pve/nodes/node1/qemu-server/100.conf", - } - node := detectNodeForVM(entry) + node := detectNodeForVM() // detectNodeForVM returns the current hostname, not the node from path if node == "" { t.Fatalf("expected non-empty node from hostname") diff --git a/internal/orchestrator/restore_tui.go b/internal/orchestrator/restore_tui.go index be03115..8b41afd 100644 --- a/internal/orchestrator/restore_tui.go +++ b/internal/orchestrator/restore_tui.go @@ -143,6 +143,90 @@ func selectRestoreModeTUI(systemType SystemType, configPath, buildSig, backupSum return selected, nil } +func selectPBSRestoreBehaviorTUI(configPath, buildSig, backupSummary string) (PBSRestoreBehavior, error) { + app := newTUIApp() + var selected PBSRestoreBehavior + var aborted bool + + list := tview.NewList().ShowSecondaryText(true) + list.SetMainTextColor(tcell.ColorWhite). + SetSelectedTextColor(tcell.ColorWhite). + SetSelectedBackgroundColor(tui.ProxmoxOrange) + + list.AddItem( + "1) Merge (existing PBS)", + "Restore onto an already operational PBS. Avoids API-side deletions of existing PBS objects that are not in the backup.", + 0, + nil, + ) + list.AddItem( + "2) Clean 1:1 (fresh PBS install)", + "Restore onto a new, clean PBS installation. Tries to make PBS configuration match the backup (may remove objects not in the backup).", + 0, + nil, + ) + + list.SetSelectedFunc(func(index int, mainText, secondaryText string, shortcut rune) { + switch index { + case 0: + selected = PBSRestoreBehaviorMerge + case 1: + selected = PBSRestoreBehaviorClean + default: + selected = PBSRestoreBehaviorUnspecified + } + if selected != PBSRestoreBehaviorUnspecified { + app.Stop() + } + }) + list.SetDoneFunc(func() { + aborted = true + app.Stop() + }) + + form := components.NewForm(app) + listItem := components.NewListFormItem(list). + SetLabel("Select PBS restore behavior"). + SetFieldHeight(6) + form.Form.AddFormItem(listItem) + form.Form.SetFocus(0) + + form.SetOnCancel(func() { + aborted = true + }) + form.AddCancelButton("Cancel") + enableFormNavigation(form, nil) + + // Selected backup summary + summaryText := strings.TrimSpace(backupSummary) + var summaryView tview.Primitive + if summaryText != "" { + summary := tview.NewTextView(). + SetText(fmt.Sprintf("Selected backup: %s", summaryText)). + SetWrap(true). + SetTextColor(tcell.ColorWhite) + summary.SetBorder(false) + summaryView = summary + } else { + summaryView = tview.NewBox() + } + + content := tview.NewFlex(). + SetDirection(tview.FlexRow). + AddItem(summaryView, 2, 0, false). + AddItem(form.Form, 0, 1, true) + + page := buildRestoreWizardPage("PBS restore behavior", configPath, buildSig, content) + app.SetRoot(page, true).SetFocus(form.Form) + if err := app.Run(); err != nil { + return PBSRestoreBehaviorUnspecified, err + } + if aborted || selected == PBSRestoreBehaviorUnspecified { + return PBSRestoreBehaviorUnspecified, ErrRestoreAborted + } + return selected, nil +} + func filterAndSortCategoriesForSystem(available []Category, systemType SystemType) []Category { relevant := make([]Category, 0, len(available)) for _, cat := range available { @@ -328,7 +412,7 @@ func maybeRepairNICNamesTUI(ctx context.Context, logger *logging.Logger, archive return &nicRepairResult{AppliedAt: nowRestore(), SkippedReason: plan.SkippedReason} } - if plan != nil && !plan.Mapping.IsEmpty() { + if !plan.Mapping.IsEmpty() { logging.DebugStep(logger, "NIC repair", "Detect persistent NIC naming overrides (udev/systemd)") overrides, err := detectNICNamingOverrideRules(logger) if err != nil { @@ -756,35 +840,6 @@ func promptYesNoTUIWithCountdown(ctx context.Context, logger *logging.Logger, ti return result, nil } -func promptOkTUI(title, configPath, buildSig, message, okLabel string) error { - app := newTUIApp() - - infoText := tview.NewTextView(). - SetText(message). - SetWrap(true). - SetTextColor(tcell.ColorWhite). - SetDynamicColors(true) - - form := components.NewForm(app) - form.SetOnSubmit(func(values map[string]string) error { - return nil - }) - form.SetOnCancel(func() {}) - form.AddSubmitButton(okLabel) - form.AddCancelButton("Close") - enableFormNavigation(form, nil) - - content := tview.NewFlex(). - SetDirection(tview.FlexRow). - AddItem(infoText, 0, 1, false). - AddItem(form.Form, 3, 0, true) - - page := buildRestoreWizardPage(title, configPath, buildSig, content) - form.SetParentView(page) - - return app.SetRoot(page, true).SetFocus(form.Form).Run() -} - func promptNetworkCommitTUI(timeout time.Duration, health networkHealthReport, nicRepair *nicRepairResult, diagnosticsDir, configPath, buildSig string) (bool, error) { app := newTUIApp() var committed bool diff --git a/internal/orchestrator/restore_tui_simulation_test.go b/internal/orchestrator/restore_tui_simulation_test.go index ff1226f..2ec843c 100644 --- a/internal/orchestrator/restore_tui_simulation_test.go +++ b/internal/orchestrator/restore_tui_simulation_test.go @@ -145,4 +145,3 @@ func TestSelectCategoriesTUI_CancelReturnsAborted(t *testing.T) { t.Fatalf("err=%v; want %v", err, ErrRestoreAborted) } } - diff --git a/internal/orchestrator/restore_workflow_abort_test.go b/internal/orchestrator/restore_workflow_abort_test.go index 5a1fed1..9dc5d31 100644 --- a/internal/orchestrator/restore_workflow_abort_test.go +++ b/internal/orchestrator/restore_workflow_abort_test.go @@ -93,10 +93,10 @@ func TestRunRestoreWorkflow_FstabPromptInputAborted_AbortsWorkflow(t *testing.T) logger := logging.New(types.LogLevelError, false) cfg := &config.Config{BaseDir: "/base"} ui := &fakeRestoreWorkflowUI{ - mode: RestoreModeCustom, - categories: []Category{mustCategoryByID(t, "filesystem")}, - confirmRestore: true, - confirmFstabMerge: false, + mode: RestoreModeCustom, + categories: []Category{mustCategoryByID(t, "filesystem")}, + confirmRestore: true, + confirmFstabMerge: false, confirmFstabMergeErr: input.ErrInputAborted, } diff --git a/internal/orchestrator/restore_workflow_ui.go b/internal/orchestrator/restore_workflow_ui.go index 871ad9b..f90c1ad 100644 --- a/internal/orchestrator/restore_workflow_ui.go +++ b/internal/orchestrator/restore_workflow_ui.go @@ -129,6 +129,22 @@ func runRestoreWorkflowWithUI(ctx context.Context, cfg *config.Config, logger *l plan := PlanRestore(candidate.Manifest, selectedCategories, systemType, mode) + if plan.SystemType == SystemTypePBS && + (plan.HasCategoryID("pbs_host") || + plan.HasCategoryID("datastore_pbs") || + plan.HasCategoryID("pbs_remotes") || + plan.HasCategoryID("pbs_jobs") || + plan.HasCategoryID("pbs_notifications") || + plan.HasCategoryID("pbs_access_control") || + plan.HasCategoryID("pbs_tape")) { + behavior, err := ui.SelectPBSRestoreBehavior(ctx) + if err != nil { + return err + } + plan.PBSRestoreBehavior = behavior + logger.Info("PBS restore behavior: %s", behavior.DisplayName()) + } + clusterBackup := strings.EqualFold(strings.TrimSpace(candidate.Manifest.ClusterMode), "cluster") if plan.NeedsClusterRestore && clusterBackup { logger.Info("Backup marked as cluster node; enabling guarded restore options for pve_cluster") @@ -762,13 +778,14 @@ func runRestoreWorkflowWithUI(ctx context.Context, cfg *config.Config, logger *l logger.Info("") logger.Info("IMPORTANT: You may need to restart services for changes to take effect.") - if systemType == SystemTypePVE { + switch systemType { + case SystemTypePVE: if needsClusterRestore && clusterServicesStopped { logger.Info(" PVE services were stopped/restarted during restore; verify status with: pvecm status") } else { logger.Info(" PVE services: systemctl restart pve-cluster pvedaemon pveproxy") } - } else if systemType == SystemTypePBS { + case SystemTypePBS: if pbsServicesStopped { logger.Info(" PBS services were stopped/restarted during restore; verify status with: systemctl status proxmox-backup proxmox-backup-proxy") } else { diff --git a/internal/orchestrator/restore_workflow_ui_helpers_test.go b/internal/orchestrator/restore_workflow_ui_helpers_test.go index dc03218..f2a0372 100644 --- a/internal/orchestrator/restore_workflow_ui_helpers_test.go +++ b/internal/orchestrator/restore_workflow_ui_helpers_test.go @@ -7,40 +7,46 @@ import ( ) type fakeRestoreWorkflowUI struct { - mode RestoreMode - categories []Category - confirmRestore bool - confirmCompatible bool - clusterMode ClusterRestoreMode - continueNoSafety bool + mode RestoreMode + categories []Category + pbsBehavior PBSRestoreBehavior + confirmRestore bool + confirmCompatible bool + clusterMode ClusterRestoreMode + continueNoSafety bool continuePBSServices bool - confirmFstabMerge bool - exportNode string - applyVMConfigs bool - applyStorageCfg bool - applyDatacenterCfg bool - confirmAction bool - networkCommit bool - - modeErr error - categoriesErr error - confirmRestoreErr error - confirmCompatibleErr error - clusterModeErr error - continueNoSafetyErr error + confirmFstabMerge bool + exportNode string + applyVMConfigs bool + applyStorageCfg bool + applyDatacenterCfg bool + confirmAction bool + networkCommit bool + + modeErr error + categoriesErr error + pbsBehaviorErr error + confirmRestoreErr error + confirmCompatibleErr error + clusterModeErr error + continueNoSafetyErr error continuePBSServicesErr error - confirmFstabMergeErr error - confirmActionErr error - repairNICNamesErr error - networkCommitErr error + confirmFstabMergeErr error + confirmActionErr error + repairNICNamesErr error + networkCommitErr error } func (f *fakeRestoreWorkflowUI) RunTask(ctx context.Context, title, initialMessage string, run func(ctx context.Context, report ProgressReporter) error) error { return run(ctx, nil) } -func (f *fakeRestoreWorkflowUI) ShowMessage(ctx context.Context, title, message string) error { return nil } -func (f *fakeRestoreWorkflowUI) ShowError(ctx context.Context, title, message string) error { return nil } +func (f *fakeRestoreWorkflowUI) ShowMessage(ctx context.Context, title, message string) error { + return nil +} +func (f *fakeRestoreWorkflowUI) ShowError(ctx context.Context, title, message string) error { + return nil +} func (f *fakeRestoreWorkflowUI) SelectBackupSource(ctx context.Context, options []decryptPathOption) (decryptPathOption, error) { return decryptPathOption{}, fmt.Errorf("unexpected SelectBackupSource call") @@ -62,7 +68,16 @@ func (f *fakeRestoreWorkflowUI) SelectCategories(ctx context.Context, available return f.categories, f.categoriesErr } -func (f *fakeRestoreWorkflowUI) ShowRestorePlan(ctx context.Context, config *SelectiveRestoreConfig) error { return nil } +func (f *fakeRestoreWorkflowUI) SelectPBSRestoreBehavior(ctx context.Context) (PBSRestoreBehavior, error) { + if f.pbsBehavior == PBSRestoreBehaviorUnspecified && f.pbsBehaviorErr == nil { + return PBSRestoreBehaviorClean, nil + } + return f.pbsBehavior, f.pbsBehaviorErr +} + +func (f *fakeRestoreWorkflowUI) ShowRestorePlan(ctx context.Context, config *SelectiveRestoreConfig) error { + return nil +} func (f *fakeRestoreWorkflowUI) ConfirmRestore(ctx context.Context) (bool, error) { return f.confirmRestore, f.confirmRestoreErr @@ -115,4 +130,3 @@ func (f *fakeRestoreWorkflowUI) RepairNICNames(ctx context.Context, archivePath func (f *fakeRestoreWorkflowUI) PromptNetworkCommit(ctx context.Context, remaining time.Duration, health networkHealthReport, nicRepair *nicRepairResult, diagnosticsDir string) (bool, error) { return f.networkCommit, f.networkCommitErr } - diff --git a/internal/orchestrator/restore_workflow_ui_tfa_test.go b/internal/orchestrator/restore_workflow_ui_tfa_test.go index 647de55..cc9d298 100644 --- a/internal/orchestrator/restore_workflow_ui_tfa_test.go +++ b/internal/orchestrator/restore_workflow_ui_tfa_test.go @@ -49,4 +49,3 @@ func TestMaybeAddRecommendedCategoriesForTFA_DoesNotAddWhenDeclined(t *testing.T t.Fatalf("expected no categories to be added, got=%v", got) } } - diff --git a/internal/orchestrator/selective.go b/internal/orchestrator/selective.go index 18f7583..4b05ea2 100644 --- a/internal/orchestrator/selective.go +++ b/internal/orchestrator/selective.go @@ -158,11 +158,12 @@ func ShowRestoreModeMenuWithReader(ctx context.Context, reader *bufio.Reader, lo fmt.Println("Select restore mode:") fmt.Println(" [1] FULL restore - Restore everything from backup") - if systemType == SystemTypePVE { + switch systemType { + case SystemTypePVE: fmt.Println(" [2] STORAGE only - PVE cluster + storage + jobs + mounts") - } else if systemType == SystemTypePBS { + case SystemTypePBS: fmt.Println(" [2] DATASTORE only - PBS datastore definitions + sync/verify/prune jobs + mounts") - } else { + default: fmt.Println(" [2] STORAGE/DATASTORE only - Storage or datastore configuration") } diff --git a/internal/orchestrator/tui_hooks.go b/internal/orchestrator/tui_hooks.go index fe2cedb..e347e42 100644 --- a/internal/orchestrator/tui_hooks.go +++ b/internal/orchestrator/tui_hooks.go @@ -4,4 +4,3 @@ import "github.com/tis24dev/proxsave/internal/tui" // newTUIApp is an injection point for tests. Production uses tui.NewApp. var newTUIApp = tui.NewApp - diff --git a/internal/orchestrator/tui_simulation_test.go b/internal/orchestrator/tui_simulation_test.go index 7f52aef..27dd3d0 100644 --- a/internal/orchestrator/tui_simulation_test.go +++ b/internal/orchestrator/tui_simulation_test.go @@ -30,19 +30,19 @@ func withSimAppSequence(t *testing.T, keys []simKey) { app.SetScreen(screen) go func() { - // Wait for app.Run() to start event processing. - time.Sleep(50 * time.Millisecond) - for _, k := range keys { - mod := k.Mod - if mod == 0 { - mod = tcell.ModNone + // Wait for app.Run() to start event processing. + time.Sleep(50 * time.Millisecond) + for _, k := range keys { + mod := k.Mod + if mod == 0 { + mod = tcell.ModNone + } + screen.InjectKey(k.Key, k.R, mod) + time.Sleep(10 * time.Millisecond) } - screen.InjectKey(k.Key, k.R, mod) - time.Sleep(10 * time.Millisecond) - } - }() - return app -} + }() + return app + } t.Cleanup(func() { newTUIApp = orig diff --git a/internal/orchestrator/workflow_ui.go b/internal/orchestrator/workflow_ui.go index e951c5c..03db940 100644 --- a/internal/orchestrator/workflow_ui.go +++ b/internal/orchestrator/workflow_ui.go @@ -55,6 +55,7 @@ type RestoreWorkflowUI interface { PromptDecryptSecret(ctx context.Context, displayName, previousError string) (string, error) SelectRestoreMode(ctx context.Context, systemType SystemType) (RestoreMode, error) SelectCategories(ctx context.Context, available []Category, systemType SystemType) ([]Category, error) + SelectPBSRestoreBehavior(ctx context.Context) (PBSRestoreBehavior, error) ShowRestorePlan(ctx context.Context, config *SelectiveRestoreConfig) error ConfirmRestore(ctx context.Context) (bool, error) diff --git a/internal/orchestrator/workflow_ui_cli.go b/internal/orchestrator/workflow_ui_cli.go index 940c3b1..1d303c7 100644 --- a/internal/orchestrator/workflow_ui_cli.go +++ b/internal/orchestrator/workflow_ui_cli.go @@ -184,6 +184,32 @@ func (u *cliWorkflowUI) SelectCategories(ctx context.Context, available []Catego return ShowCategorySelectionMenuWithReader(ctx, u.reader, u.logger, available, systemType) } +func (u *cliWorkflowUI) SelectPBSRestoreBehavior(ctx context.Context) (PBSRestoreBehavior, error) { + fmt.Println() + fmt.Println("PBS restore reconciliation:") + fmt.Println(" [1] Merge (existing PBS) - Restore onto an already operational PBS (avoids API-side deletions of existing PBS objects not in the backup).") + fmt.Println(" [2] Clean 1:1 (fresh PBS install) - Restore onto a new, clean PBS and try to make configuration match the backup (may remove existing PBS objects not in the backup).") + fmt.Println(" [0] Exit") + + for { + fmt.Print("Choice: ") + line, err := input.ReadLineWithContext(ctx, u.reader) + if err != nil { + return PBSRestoreBehaviorUnspecified, err + } + switch strings.TrimSpace(line) { + case "1": + return PBSRestoreBehaviorMerge, nil + case "2": + return PBSRestoreBehaviorClean, nil + case "0": + return PBSRestoreBehaviorUnspecified, ErrRestoreAborted + default: + fmt.Println("Please enter 1, 2 or 0.") + } + } +} + func (u *cliWorkflowUI) ShowRestorePlan(ctx context.Context, config *SelectiveRestoreConfig) error { ShowRestorePlan(u.logger, config) return nil diff --git a/internal/orchestrator/workflow_ui_tui_restore.go b/internal/orchestrator/workflow_ui_tui_restore.go index 7c53681..350bd82 100644 --- a/internal/orchestrator/workflow_ui_tui_restore.go +++ b/internal/orchestrator/workflow_ui_tui_restore.go @@ -21,6 +21,10 @@ func (u *tuiWorkflowUI) SelectCategories(ctx context.Context, available []Catego return selectCategoriesTUI(available, systemType, u.configPath, u.buildSig) } +func (u *tuiWorkflowUI) SelectPBSRestoreBehavior(ctx context.Context) (PBSRestoreBehavior, error) { + return selectPBSRestoreBehaviorTUI(u.configPath, u.buildSig, strings.TrimSpace(u.selectedBackupSummary)) +} + func (u *tuiWorkflowUI) ShowRestorePlan(ctx context.Context, config *SelectiveRestoreConfig) error { return showRestorePlanTUI(config, u.configPath, u.buildSig) } @@ -147,4 +151,3 @@ func (u *tuiWorkflowUI) ConfirmApplyDatacenterCfg(ctx context.Context, datacente message := fmt.Sprintf("Datacenter configuration found:\n\n%s\n\nApply datacenter.cfg via pvesh now?", strings.TrimSpace(datacenterCfgPath)) return promptYesNoTUIFunc("Apply datacenter.cfg", u.configPath, u.buildSig, message, "Apply via API", "Skip") } - diff --git a/internal/pbs/namespaces.go b/internal/pbs/namespaces.go index 92bf869..345ac56 100644 --- a/internal/pbs/namespaces.go +++ b/internal/pbs/namespaces.go @@ -2,14 +2,18 @@ package pbs import ( "bytes" + "context" "encoding/json" + "errors" "fmt" - "os" "os/exec" "path/filepath" + "time" + + "github.com/tis24dev/proxsave/internal/safefs" ) -var execCommand = exec.Command +var execCommand = exec.CommandContext // Namespace represents a single PBS namespace. type Namespace struct { @@ -26,12 +30,15 @@ type listNamespacesResponse struct { // ListNamespaces tries the PBS CLI first and, if it fails, // falls back to the filesystem to infer namespaces. -func ListNamespaces(datastoreName, datastorePath string) ([]Namespace, bool, error) { - if namespaces, err := listNamespacesViaCLI(datastoreName); err == nil { +func ListNamespaces(ctx context.Context, datastoreName, datastorePath string, ioTimeout time.Duration) ([]Namespace, bool, error) { + if namespaces, err := listNamespacesViaCLI(ctx, datastoreName); err == nil { return namespaces, false, nil } + if err := ctx.Err(); err != nil { + return nil, false, err + } - namespaces, err := discoverNamespacesFromFilesystem(datastorePath) + namespaces, err := discoverNamespacesFromFilesystem(ctx, datastorePath, ioTimeout) if err != nil { return nil, false, err } @@ -39,8 +46,13 @@ func ListNamespaces(datastoreName, datastorePath string) ([]Namespace, bool, err return namespaces, true, nil } -func listNamespacesViaCLI(datastore string) ([]Namespace, error) { +func listNamespacesViaCLI(ctx context.Context, datastore string) ([]Namespace, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + cmd := execCommand( + ctx, "proxmox-backup-manager", "datastore", "namespace", @@ -65,12 +77,12 @@ func listNamespacesViaCLI(datastore string) ([]Namespace, error) { return parsed.Data, nil } -func discoverNamespacesFromFilesystem(datastorePath string) ([]Namespace, error) { +func discoverNamespacesFromFilesystem(ctx context.Context, datastorePath string, ioTimeout time.Duration) ([]Namespace, error) { if datastorePath == "" { return nil, fmt.Errorf("datastore path is empty") } - entries, err := os.ReadDir(datastorePath) + entries, err := safefs.ReadDir(ctx, datastorePath, ioTimeout) if err != nil { return nil, fmt.Errorf("cannot read datastore path %s: %w", datastorePath, err) } @@ -91,12 +103,14 @@ func discoverNamespacesFromFilesystem(datastorePath string) ([]Namespace, error) subPath := filepath.Join(datastorePath, entry.Name()) for _, chk := range checkDirs { - if _, err := os.Stat(filepath.Join(subPath, chk)); err == nil { + if _, err := safefs.Stat(ctx, filepath.Join(subPath, chk), ioTimeout); err == nil { namespaces = append(namespaces, Namespace{ Ns: entry.Name(), Path: subPath, }) break + } else if errors.Is(err, safefs.ErrTimeout) { + return nil, err } } } diff --git a/internal/pbs/namespaces_test.go b/internal/pbs/namespaces_test.go index 494525d..ac358b9 100644 --- a/internal/pbs/namespaces_test.go +++ b/internal/pbs/namespaces_test.go @@ -1,6 +1,7 @@ package pbs import ( + "context" "encoding/json" "fmt" "os" @@ -65,7 +66,7 @@ func TestDiscoverNamespacesFromFilesystem_DetectsSupportedDirs(t *testing.T) { mustMkdirAll(t, filepath.Join(tmpDir, "host-ns", "host")) mustMkdirAll(t, filepath.Join(tmpDir, "nested-ns", "namespace")) - namespaces, err := discoverNamespacesFromFilesystem(tmpDir) + namespaces, err := discoverNamespacesFromFilesystem(context.Background(), tmpDir, 0) if err != nil { t.Fatalf("discover failed: %v", err) } @@ -104,7 +105,7 @@ func TestDiscoverNamespacesFromFilesystem_IgnoresNonDirectories(t *testing.T) { mustWriteFile(t, filepath.Join(tmpDir, "some-file.txt"), []byte("ignore me")) mustMkdirAll(t, filepath.Join(tmpDir, "valid-ns", "vm")) - namespaces, err := discoverNamespacesFromFilesystem(tmpDir) + namespaces, err := discoverNamespacesFromFilesystem(context.Background(), tmpDir, 0) if err != nil { t.Fatalf("discover failed: %v", err) } @@ -119,12 +120,12 @@ func TestDiscoverNamespacesFromFilesystem_IgnoresNonDirectories(t *testing.T) { } func TestDiscoverNamespacesFromFilesystem_Errors(t *testing.T) { - if _, err := discoverNamespacesFromFilesystem(""); err == nil || !strings.Contains(err.Error(), "datastore path is empty") { + if _, err := discoverNamespacesFromFilesystem(context.Background(), "", 0); err == nil || !strings.Contains(err.Error(), "datastore path is empty") { t.Fatalf("expected error for empty path, got %v", err) } missing := filepath.Join(t.TempDir(), "missing") - if _, err := discoverNamespacesFromFilesystem(missing); err == nil || !strings.Contains(err.Error(), "cannot read datastore path") { + if _, err := discoverNamespacesFromFilesystem(context.Background(), missing, 0); err == nil || !strings.Contains(err.Error(), "cannot read datastore path") { t.Fatalf("expected error for missing path, got %v", err) } } @@ -132,7 +133,7 @@ func TestDiscoverNamespacesFromFilesystem_Errors(t *testing.T) { func TestListNamespaces_CLISuccess(t *testing.T) { setExecCommandStub(t, "cli-success") - namespaces, usedFallback, err := ListNamespaces("dummy", t.TempDir()) + namespaces, usedFallback, err := ListNamespaces(context.Background(), "dummy", t.TempDir(), 0) if err != nil { t.Fatalf("ListNamespaces failed: %v", err) } @@ -155,7 +156,7 @@ func TestListNamespaces_CLIFallback(t *testing.T) { tmpDir := t.TempDir() mustMkdirAll(t, filepath.Join(tmpDir, "local", "vm")) - namespaces, usedFallback, err := ListNamespaces("dummy", tmpDir) + namespaces, usedFallback, err := ListNamespaces(context.Background(), "dummy", tmpDir, 0) if err != nil { t.Fatalf("ListNamespaces failed: %v", err) } @@ -171,7 +172,7 @@ func TestListNamespaces_CLIFallback(t *testing.T) { func TestListNamespacesViaCLI_ErrorIncludesStderr(t *testing.T) { setExecCommandStub(t, "cli-error") - if _, err := listNamespacesViaCLI("dummy"); err == nil || !strings.Contains(err.Error(), "stderr: CLI exploded") { + if _, err := listNamespacesViaCLI(context.Background(), "dummy"); err == nil || !strings.Contains(err.Error(), "stderr: CLI exploded") { t.Fatalf("expected stderr text in error, got %v", err) } } @@ -197,7 +198,7 @@ func TestHelperProcess(t *testing.T) { func setExecCommandStub(t *testing.T, scenario string) { t.Helper() original := execCommand - execCommand = func(string, ...string) *exec.Cmd { + execCommand = func(context.Context, string, ...string) *exec.Cmd { cmd := exec.Command(os.Args[0], "-test.run=TestHelperProcess", "--") cmd.Env = append(os.Environ(), "GO_WANT_HELPER_PROCESS=1", diff --git a/internal/safefs/safefs.go b/internal/safefs/safefs.go new file mode 100644 index 0000000..36b001a --- /dev/null +++ b/internal/safefs/safefs.go @@ -0,0 +1,155 @@ +package safefs + +import ( + "context" + "errors" + "fmt" + "io/fs" + "os" + "syscall" + "time" +) + +var ( + osStat = os.Stat + osReadDir = os.ReadDir + syscallStatfs = syscall.Statfs +) + +// ErrTimeout is a sentinel error used to classify filesystem operations that did not +// complete within the configured timeout. +var ErrTimeout = errors.New("filesystem operation timed out") + +// TimeoutError is returned when a filesystem operation exceeds its allowed duration. +// Note that this does not cancel the underlying kernel call; it only stops waiting. +type TimeoutError struct { + Op string + Path string + Timeout time.Duration +} + +func (e *TimeoutError) Error() string { + if e == nil { + return "filesystem operation timed out" + } + if e.Timeout > 0 { + return fmt.Sprintf("%s %s: timeout after %s", e.Op, e.Path, e.Timeout) + } + return fmt.Sprintf("%s %s: timeout", e.Op, e.Path) +} + +func (e *TimeoutError) Unwrap() error { return ErrTimeout } + +func effectiveTimeout(ctx context.Context, timeout time.Duration) time.Duration { + if timeout <= 0 { + return 0 + } + if deadline, ok := ctx.Deadline(); ok { + remaining := time.Until(deadline) + if remaining <= 0 { + return 0 + } + if remaining < timeout { + return remaining + } + } + return timeout +} + +func Stat(ctx context.Context, path string, timeout time.Duration) (fs.FileInfo, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + timeout = effectiveTimeout(ctx, timeout) + if timeout <= 0 { + return osStat(path) + } + + type result struct { + info fs.FileInfo + err error + } + ch := make(chan result, 1) + go func() { + info, err := osStat(path) + ch <- result{info: info, err: err} + }() + + timer := time.NewTimer(timeout) + defer timer.Stop() + + select { + case r := <-ch: + return r.info, r.err + case <-ctx.Done(): + return nil, ctx.Err() + case <-timer.C: + return nil, &TimeoutError{Op: "stat", Path: path, Timeout: timeout} + } +} + +func ReadDir(ctx context.Context, path string, timeout time.Duration) ([]os.DirEntry, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + timeout = effectiveTimeout(ctx, timeout) + if timeout <= 0 { + return osReadDir(path) + } + + type result struct { + entries []os.DirEntry + err error + } + ch := make(chan result, 1) + go func() { + entries, err := osReadDir(path) + ch <- result{entries: entries, err: err} + }() + + timer := time.NewTimer(timeout) + defer timer.Stop() + + select { + case r := <-ch: + return r.entries, r.err + case <-ctx.Done(): + return nil, ctx.Err() + case <-timer.C: + return nil, &TimeoutError{Op: "readdir", Path: path, Timeout: timeout} + } +} + +func Statfs(ctx context.Context, path string, timeout time.Duration) (syscall.Statfs_t, error) { + if err := ctx.Err(); err != nil { + return syscall.Statfs_t{}, err + } + timeout = effectiveTimeout(ctx, timeout) + if timeout <= 0 { + var stat syscall.Statfs_t + return stat, syscallStatfs(path, &stat) + } + + type result struct { + stat syscall.Statfs_t + err error + } + ch := make(chan result, 1) + go func() { + var stat syscall.Statfs_t + err := syscallStatfs(path, &stat) + ch <- result{stat: stat, err: err} + }() + + timer := time.NewTimer(timeout) + defer timer.Stop() + + select { + case r := <-ch: + return r.stat, r.err + case <-ctx.Done(): + return syscall.Statfs_t{}, ctx.Err() + case <-timer.C: + return syscall.Statfs_t{}, &TimeoutError{Op: "statfs", Path: path, Timeout: timeout} + } +} diff --git a/internal/safefs/safefs_test.go b/internal/safefs/safefs_test.go new file mode 100644 index 0000000..30646ae --- /dev/null +++ b/internal/safefs/safefs_test.go @@ -0,0 +1,74 @@ +package safefs + +import ( + "context" + "errors" + "os" + "syscall" + "testing" + "time" +) + +func TestStat_ReturnsTimeoutError(t *testing.T) { + prev := osStat + defer func() { osStat = prev }() + + osStat = func(string) (os.FileInfo, error) { + select {} + } + + start := time.Now() + _, err := Stat(context.Background(), "/does/not/matter", 25*time.Millisecond) + if err == nil || !errors.Is(err, ErrTimeout) { + t.Fatalf("Stat err = %v; want timeout", err) + } + if time.Since(start) > 250*time.Millisecond { + t.Fatalf("Stat took too long: %s", time.Since(start)) + } +} + +func TestReadDir_ReturnsTimeoutError(t *testing.T) { + prev := osReadDir + defer func() { osReadDir = prev }() + + osReadDir = func(string) ([]os.DirEntry, error) { + select {} + } + + start := time.Now() + _, err := ReadDir(context.Background(), "/does/not/matter", 25*time.Millisecond) + if err == nil || !errors.Is(err, ErrTimeout) { + t.Fatalf("ReadDir err = %v; want timeout", err) + } + if time.Since(start) > 250*time.Millisecond { + t.Fatalf("ReadDir took too long: %s", time.Since(start)) + } +} + +func TestStatfs_ReturnsTimeoutError(t *testing.T) { + prev := syscallStatfs + defer func() { syscallStatfs = prev }() + + syscallStatfs = func(string, *syscall.Statfs_t) error { + select {} + } + + start := time.Now() + _, err := Statfs(context.Background(), "/does/not/matter", 25*time.Millisecond) + if err == nil || !errors.Is(err, ErrTimeout) { + t.Fatalf("Statfs err = %v; want timeout", err) + } + if time.Since(start) > 250*time.Millisecond { + t.Fatalf("Statfs took too long: %s", time.Since(start)) + } +} + +func TestStat_PropagatesContextCancellation(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + _, err := Stat(ctx, "/does/not/matter", 50*time.Millisecond) + if !errors.Is(err, context.Canceled) { + t.Fatalf("Stat err = %v; want context.Canceled", err) + } +} diff --git a/internal/security/security.go b/internal/security/security.go index 00eea88..9edd76a 100644 --- a/internal/security/security.go +++ b/internal/security/security.go @@ -232,31 +232,31 @@ func (c *Checker) buildDependencyList() []dependencyEntry { } emailMethod := strings.ToLower(strings.TrimSpace(c.cfg.EmailDeliveryMethod)) - if emailMethod == "" { - emailMethod = "relay" - } - if emailMethod == "pmf" { - deps = append(deps, c.binaryDependency( - "proxmox-mail-forward", - []string{"/usr/libexec/proxmox-mail-forward", "/usr/bin/proxmox-mail-forward", "proxmox-mail-forward"}, - true, - "email delivery method set to pmf (Proxmox Notifications via proxmox-mail-forward)", - )) - } else if emailMethod == "sendmail" { - deps = append(deps, c.binaryDependency( - "sendmail", - []string{"/usr/sbin/sendmail", "sendmail"}, - true, - "email delivery method set to sendmail (/usr/sbin/sendmail)", - )) - } else if emailMethod == "relay" && c.cfg.EmailFallbackSendmail { - deps = append(deps, c.binaryDependency( - "proxmox-mail-forward", - []string{"/usr/libexec/proxmox-mail-forward", "/usr/bin/proxmox-mail-forward", "proxmox-mail-forward"}, - false, - "email relay fallback to pmf enabled (uses proxmox-mail-forward)", - )) - } + if emailMethod == "" { + emailMethod = "relay" + } + if emailMethod == "pmf" { + deps = append(deps, c.binaryDependency( + "proxmox-mail-forward", + []string{"/usr/libexec/proxmox-mail-forward", "/usr/bin/proxmox-mail-forward", "proxmox-mail-forward"}, + true, + "email delivery method set to pmf (Proxmox Notifications via proxmox-mail-forward)", + )) + } else if emailMethod == "sendmail" { + deps = append(deps, c.binaryDependency( + "sendmail", + []string{"/usr/sbin/sendmail", "sendmail"}, + true, + "email delivery method set to sendmail (/usr/sbin/sendmail)", + )) + } else if emailMethod == "relay" && c.cfg.EmailFallbackSendmail { + deps = append(deps, c.binaryDependency( + "proxmox-mail-forward", + []string{"/usr/libexec/proxmox-mail-forward", "/usr/bin/proxmox-mail-forward", "proxmox-mail-forward"}, + false, + "email relay fallback to pmf enabled (uses proxmox-mail-forward)", + )) + } if c.cfg.BackupCephConfig { deps = append(deps, c.binaryDependency("ceph", []string{"ceph"}, false, "Ceph configuration collection enabled")) diff --git a/internal/security/security_test.go b/internal/security/security_test.go index 6f98f4c..09998d0 100644 --- a/internal/security/security_test.go +++ b/internal/security/security_test.go @@ -143,30 +143,30 @@ func TestCheckDependenciesMissingRequiredAddsError(t *testing.T) { } } - func TestCheckDependenciesMissingOptionalAddsWarning(t *testing.T) { - cfg := &config.Config{ - CompressionType: types.CompressionNone, // only tar required - EmailDeliveryMethod: "relay", - EmailFallbackSendmail: true, // pmf becomes optional dependency (relay fallback) - } - checker := newCheckerForTest(cfg, stubLookPath(map[string]bool{ - "tar": true, // present - // proxmox-mail-forward missing -> warning - })) +func TestCheckDependenciesMissingOptionalAddsWarning(t *testing.T) { + cfg := &config.Config{ + CompressionType: types.CompressionNone, // only tar required + EmailDeliveryMethod: "relay", + EmailFallbackSendmail: true, // pmf becomes optional dependency (relay fallback) + } + checker := newCheckerForTest(cfg, stubLookPath(map[string]bool{ + "tar": true, // present + // proxmox-mail-forward missing -> warning + })) checker.checkDependencies() if got := checker.result.WarningCount(); got != 1 { t.Fatalf("expected 1 warning, got %d issues=%+v", got, checker.result.Issues) } - msg := checker.result.Issues[0].Message - if !strings.Contains(msg, "Optional dependency") || !strings.Contains(msg, "proxmox-mail-forward") { - t.Fatalf("unexpected warning message: %s", msg) - } - if checker.result.ErrorCount() != 0 { - t.Fatalf("expected no errors, got %d", checker.result.ErrorCount()) - } + msg := checker.result.Issues[0].Message + if !strings.Contains(msg, "Optional dependency") || !strings.Contains(msg, "proxmox-mail-forward") { + t.Fatalf("unexpected warning message: %s", msg) } + if checker.result.ErrorCount() != 0 { + t.Fatalf("expected no errors, got %d", checker.result.ErrorCount()) + } +} func TestParseSSLineProgramExtraction(t *testing.T) { line := `tcp LISTEN 0 128 0.0.0.0:22 0.0.0.0:* users:(("sshd",pid=1234,fd=3))` diff --git a/internal/support/support_test.go b/internal/support/support_test.go index 107d1fc..f8d303e 100644 --- a/internal/support/support_test.go +++ b/internal/support/support_test.go @@ -25,9 +25,9 @@ type fakeNotifier struct { err error } -func (f *fakeNotifier) Name() string { return "fake-email" } -func (f *fakeNotifier) IsEnabled() bool { return f.enabled } -func (f *fakeNotifier) IsCritical() bool { return false } +func (f *fakeNotifier) Name() string { return "fake-email" } +func (f *fakeNotifier) IsEnabled() bool { return f.enabled } +func (f *fakeNotifier) IsCritical() bool { return false } func (f *fakeNotifier) Send(ctx context.Context, data *notify.NotificationData) (*notify.NotificationResult, error) { f.sent++ f.last = data @@ -84,9 +84,9 @@ func TestRunIntro_DeclinedConsent(t *testing.T) { func TestRunIntro_FullFlowWithRetries(t *testing.T) { withStdinFile(t, strings.Join([]string{ - "y", // accept - "y", // has issue - "", // empty nickname -> retry + "y", // accept + "y", // has issue + "", // empty nickname -> retry "user", // nickname "abc", // invalid issue (missing #) "#no", // invalid issue (non-numeric)