Skip to content
Merged
109 changes: 107 additions & 2 deletions cmd/proxsave/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -603,6 +603,15 @@
}
}()

// Defer for network rollback countdown (LIFO: executes BEFORE footer)
defer func() {
if finalExitCode == exitCodeInterrupted {
if abortInfo := orchestrator.GetLastRestoreAbortInfo(); abortInfo != nil {
printNetworkRollbackCountdown(abortInfo)
}
}
}()

defer func() {
if !args.Support || pendingSupportStats == nil {
return
Expand Down Expand Up @@ -746,7 +755,7 @@
logging.Info("Restore mode enabled - starting CLI workflow...")
if err := orchestrator.RunRestoreWorkflow(ctx, cfg, logger, toolVersion); err != nil {
if errors.Is(err, orchestrator.ErrRestoreAborted) {
logging.Info("Restore workflow aborted by user")
logging.Warning("Restore workflow aborted by user")
if args.Support {
pendingSupportStats = support.BuildSupportStats(logger, resolveHostname(), envInfo.Type, envInfo.Version, toolVersion, startTime, time.Now(), exitCodeInterrupted, "restore")
}
Expand Down Expand Up @@ -776,7 +785,7 @@
}
if err := orchestrator.RunRestoreWorkflowTUI(ctx, cfg, logger, toolVersion, args.ConfigPath, sig); err != nil {
if errors.Is(err, orchestrator.ErrRestoreAborted) || errors.Is(err, orchestrator.ErrDecryptAborted) {
logging.Info("Restore workflow aborted by user")
logging.Warning("Restore workflow aborted by user")
if args.Support {
pendingSupportStats = support.BuildSupportStats(logger, resolveHostname(), envInfo.Type, envInfo.Version, toolVersion, startTime, time.Now(), exitCodeInterrupted, "restore")
}
Expand Down Expand Up @@ -1382,6 +1391,102 @@
return finalExitCode
}

const rollbackCountdownDisplayDuration = 10 * time.Second

func printNetworkRollbackCountdown(abortInfo *orchestrator.RestoreAbortInfo) {
if abortInfo == nil {
return
}

color := "\033[33m" // yellow
colorReset := "\033[0m"

markerExists := false
if strings.TrimSpace(abortInfo.NetworkRollbackMarker) != "" {
if _, err := os.Stat(strings.TrimSpace(abortInfo.NetworkRollbackMarker)); err == nil {
markerExists = true
}
}

status := "UNKNOWN"
switch {
case markerExists:
status = "ARMED (will execute automatically)"
case !abortInfo.RollbackDeadline.IsZero() && time.Now().After(abortInfo.RollbackDeadline):
status = "EXECUTED (marker removed)"
case strings.TrimSpace(abortInfo.NetworkRollbackMarker) != "":
status = "DISARMED/CLEARED (marker removed before deadline)"
case abortInfo.NetworkRollbackArmed:
status = "ARMED (status from snapshot)"
default:
status = "NOT ARMED"
}

fmt.Println()
fmt.Printf("%s===========================================\n", color)
fmt.Printf("NETWORK ROLLBACK%s\n", colorReset)
fmt.Println()

// Static info
fmt.Printf(" Status: %s\n", status)
if strings.TrimSpace(abortInfo.OriginalIP) != "" && abortInfo.OriginalIP != "unknown" {
fmt.Printf(" Pre-apply IP (from snapshot): %s\n", strings.TrimSpace(abortInfo.OriginalIP))
}
if strings.TrimSpace(abortInfo.CurrentIP) != "" && abortInfo.CurrentIP != "unknown" {
fmt.Printf(" Post-apply IP (observed): %s\n", strings.TrimSpace(abortInfo.CurrentIP))
}
if strings.TrimSpace(abortInfo.NetworkRollbackLog) != "" {
fmt.Printf(" Rollback log: %s\n", strings.TrimSpace(abortInfo.NetworkRollbackLog))
}
fmt.Println()

switch {
case markerExists && !abortInfo.RollbackDeadline.IsZero() && time.Until(abortInfo.RollbackDeadline) > 0:
fmt.Println("Connection will be temporarily interrupted during restore.")
if strings.TrimSpace(abortInfo.OriginalIP) != "" && abortInfo.OriginalIP != "unknown" {
fmt.Printf("Remember to reconnect using the pre-apply IP: %s\n", strings.TrimSpace(abortInfo.OriginalIP))
}
case !markerExists && !abortInfo.RollbackDeadline.IsZero() && time.Now().After(abortInfo.RollbackDeadline):
if strings.TrimSpace(abortInfo.OriginalIP) != "" && abortInfo.OriginalIP != "unknown" {
fmt.Printf("Rollback executed: reconnect using the pre-apply IP: %s\n", strings.TrimSpace(abortInfo.OriginalIP))
}
case !markerExists && strings.TrimSpace(abortInfo.NetworkRollbackMarker) != "":
if strings.TrimSpace(abortInfo.CurrentIP) != "" && abortInfo.CurrentIP != "unknown" {
fmt.Printf("Rollback will NOT run: reconnect using the post-apply IP: %s\n", strings.TrimSpace(abortInfo.CurrentIP))
}
}

// Live countdown for max 10 seconds (only when rollback is still armed).
if !markerExists || abortInfo.RollbackDeadline.IsZero() {
fmt.Printf("%s===========================================%s\n", color, colorReset)
return
}

ticker := time.NewTicker(1 * time.Second)
defer ticker.Stop()
displayEnd := time.Now().Add(rollbackCountdownDisplayDuration)

for {
remaining := time.Until(abortInfo.RollbackDeadline)
if remaining <= 0 {
fmt.Printf("\r Remaining: Rollback executing now... \n")
break
}
if time.Now().After(displayEnd) {
fmt.Printf("\r Remaining: %ds (exiting, rollback will proceed)\n", int(remaining.Seconds()))
break
}
fmt.Printf("\r Remaining: %ds ", int(remaining.Seconds()))

select {

Check failure on line 1481 in cmd/proxsave/main.go

View workflow job for this annotation

GitHub Actions / security

should use a simple channel send/receive instead of select with a single case (S1000)

Check failure on line 1481 in cmd/proxsave/main.go

View workflow job for this annotation

GitHub Actions / security

should use a simple channel send/receive instead of select with a single case (S1000)
case <-ticker.C:
continue
}
}

fmt.Printf("%s===========================================%s\n", color, colorReset)
}

func printFinalSummary(finalExitCode int) {
fmt.Println()

Expand Down
6 changes: 5 additions & 1 deletion docs/BACKUP_ENV_MAPPING.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ This file documents the mapping between variables from the **old Bash `backup.en
AUTO_UPDATE_HASHES = SAME
BACKUP_BLACKLIST = SAME
BACKUP_CEPH_CONFIG = SAME
BACKUP_CLUSTER_CONFIG = SAME
BACKUP_CRITICAL_FILES = SAME
BACKUP_INSTALLED_PACKAGES = SAME
BACKUP_PVE_BACKUP_FILES = SAME
Expand Down Expand Up @@ -120,6 +119,11 @@ CLOUD_BACKUP_PATH = SEMANTIC CHANGE → CLOUD_REMOTE_PATH ⚠️
**Go**: `CLOUD_REMOTE="GoogleDrive"` (remote name) + `CLOUD_REMOTE_PATH="/proxsave/backup"` (path within that remote)
**Migration**: remove the remote name from the bash variable and move the path into `CLOUD_REMOTE_PATH`

BACKUP_CLUSTER_CONFIG = SEMANTIC CHANGE ⚠️
**Bash/legacy expectation**: skip only cluster config files.
**Go (current)**: also disables cluster runtime collection (`pvecm status`, `pvecm nodes`, HA status).
**Impact**: if you previously disabled cluster file backup but still wanted runtime status collection, re-enable `BACKUP_CLUSTER_CONFIG` (or introduce a separate runtime flag if needed).

STORAGE_WARNING_THRESHOLD_SECONDARY = SEMANTIC CHANGE → MIN_DISK_SPACE_SECONDARY_GB ⚠️
**Bash**: `STORAGE_WARNING_THRESHOLD_SECONDARY="90"` (90% used = warning)
**Go**: `MIN_DISK_SPACE_SECONDARY_GB="10"` (minimum 10GB free required)
Expand Down
33 changes: 29 additions & 4 deletions docs/CLOUD_STORAGE.md
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ This is sufficient to start! Other options use sensible defaults.
# Cloud storage
CLOUD_ENABLED=true
CLOUD_REMOTE=GoogleDrive
CLOUD_REMOTE_PATH=/proxsave/backup # Complete folder path inside the remote
CLOUD_REMOTE_PATH=/proxsave/backup # Folder path inside the remote
CLOUD_LOG_PATH=/proxsave/log # Optional: log folder inside the same remote

# Upload mode
Expand Down Expand Up @@ -377,13 +377,13 @@ RETENTION_YEARLY=3
|----------|---------|-------------|
| `CLOUD_ENABLED` | `false` | Enable cloud storage |
| `CLOUD_REMOTE` | _(required)_ | rclone remote **name** from `rclone config` (legacy `remote:path` still supported) |
| `CLOUD_REMOTE_PATH` | _(empty)_ | Full folder path/prefix inside the remote (e.g., `/proxsave/backup`) |
| `CLOUD_LOG_PATH` | _(empty)_ | Optional log folder on the same remote (set `remote:/path` only when using a different remote) |
| `CLOUD_REMOTE_PATH` | _(empty)_ | Folder path/prefix inside the remote (e.g., `/proxsave/backup`) |
| `CLOUD_LOG_PATH` | _(empty)_ | Optional log folder (recommended: path-only on the same remote; use `otherremote:/path` only when using a different remote) |
| `CLOUD_UPLOAD_MODE` | `parallel` | `parallel` or `sequential` |
| `CLOUD_PARALLEL_MAX_JOBS` | `2` | Max concurrent uploads (parallel mode) |
| `CLOUD_PARALLEL_VERIFICATION` | `true` | Verify checksums after upload |
| `CLOUD_WRITE_HEALTHCHECK` | `false` | Use write test for connectivity check |
| `RCLONE_TIMEOUT_CONNECTION` | `30` | Connection timeout (seconds) |
| `RCLONE_TIMEOUT_CONNECTION` | `30` | Connection timeout (seconds). Also used by restore/decrypt when scanning cloud backups (list + manifest read). |
| `RCLONE_TIMEOUT_OPERATION` | `300` | Operation timeout (seconds) |
| `RCLONE_BANDWIDTH_LIMIT` | _(empty)_ | Upload rate limit (e.g., `5M` = 5 MB/s) |
| `RCLONE_TRANSFERS` | `4` | Number of parallel transfers |
Expand All @@ -395,6 +395,29 @@ RETENTION_YEARLY=3

For complete configuration reference, see: **[Configuration Guide](CONFIGURATION.md)**

### Recommended Remote Path Formats (Important)

ProxSave supports both “new style” (path-only) and “legacy style” (`remote:path`) values, but using a consistent format avoids confusion.

**Recommended:**
- `CLOUD_REMOTE` should be just the **remote name** (no `:`), e.g. `nextcloud` or `GoogleDrive`.
- `CLOUD_REMOTE_PATH` should be a **path inside the remote** (no remote prefix). Use **no trailing slash**. A leading `/` is accepted.
- `CLOUD_LOG_PATH` should be a **folder path** for logs. When logs are stored on the **same remote**, prefer **path-only** here too (no remote prefix). Use `otherremote:/path` only if logs must go to a different remote than `CLOUD_REMOTE`.

**Examples (same remote):**
```bash
CLOUD_REMOTE=nextcloud-katerasrael
CLOUD_REMOTE_PATH=B+K/BACKUP/marcellus
CLOUD_LOG_PATH=B+K/BACKUP/marcellus/logs
```

**Examples (different remotes for backups vs logs):**
```bash
CLOUD_REMOTE=nextcloud-backups
CLOUD_REMOTE_PATH=proxsave/backup/host1
CLOUD_LOG_PATH=nextcloud-logs:proxsave/log/host1
```

### Understanding CLOUD_REMOTE vs CLOUD_REMOTE_PATH

**How CLOUD_REMOTE and CLOUD_REMOTE_PATH work together**
Expand All @@ -414,6 +437,7 @@ path inside the remote, and uses that consistently for:
- **uploads** (cloud backend);
- **cloud retention**;
- **restore / decrypt menus** (entry “Cloud backups (rclone)”).
- Restore/decrypt cloud scanning is protected by `RCLONE_TIMEOUT_CONNECTION` (increase it on slow remotes or very large directories).

You can choose the style you prefer; they are equivalent from the tool’s point of view.

Expand Down Expand Up @@ -593,6 +617,7 @@ rm /tmp/test*.txt
| `couldn't find configuration section 'gdrive'` | Remote not configured | `rclone config` → create remote |
| `401 unauthorized` | Credentials expired | `rclone config reconnect gdrive` or regenerate keys |
| `connection timeout (30s)` | Slow network | Increase `RCLONE_TIMEOUT_CONNECTION=60` |
| `Timed out while scanning ... (rclone)` | Slow remote / huge directory | Increase `RCLONE_TIMEOUT_CONNECTION` and re-try restore/decrypt scan |
| `operation timeout (300s exceeded)` | Large file + slow network | Increase `RCLONE_TIMEOUT_OPERATION=900` |
| `429 Too Many Requests` | API rate limiting | Reduce `RCLONE_TRANSFERS=2`, increase `CLOUD_BATCH_PAUSE=3` |
| `directory not found` | Path doesn't exist | `rclone mkdir gdrive:pbs-backups` |
Expand Down
26 changes: 22 additions & 4 deletions docs/CONFIGURATION.md
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,11 @@ CLOUD_ENABLED=false # true | false
CLOUD_REMOTE=GoogleDrive # remote name from `rclone config`
CLOUD_REMOTE_PATH=/proxsave/backup # folder path inside the remote

# Cloud log path (same remote, optional)
# Cloud log path (optional)
# Recommended (same remote): path-only (no remote prefix) and no trailing slash
# CLOUD_LOG_PATH=proxsave/log
# Legacy / different remote: explicit remote:path
# CLOUD_LOG_PATH=OtherRemote:proxsave/log
CLOUD_LOG_PATH=/proxsave/log # leave empty to disable log uploads

# Legacy compatibility (still supported):
Expand All @@ -446,6 +450,20 @@ CLOUD_PARALLEL_VERIFICATION=true # true | false
CLOUD_WRITE_HEALTHCHECK=false # true | false (auto-fallback mode vs force write test)
```

### Recommended Remote Path Formats (Cloud)

To avoid ambiguity, prefer consistent formats:
- `CLOUD_REMOTE`: remote **name** only (no `:`), e.g. `nextcloud` or `GoogleDrive`.
- `CLOUD_REMOTE_PATH`: path **inside** the remote (no remote prefix), **no trailing slash** (leading `/` is accepted).
- `CLOUD_LOG_PATH`: log **folder path**. When logs are on the same remote as backups, prefer **path-only** here too; use `otherremote:/path` only when logs must go to a different remote.

Example (same remote for backups + logs):
```bash
CLOUD_REMOTE=nextcloud-katerasrael
CLOUD_REMOTE_PATH=B+K/BACKUP/marcellus
CLOUD_LOG_PATH=B+K/BACKUP/marcellus/logs
```

### Connectivity Check Modes

| Setting | Mode | Behavior |
Expand Down Expand Up @@ -530,7 +548,7 @@ Quick comparison to help you choose the right storage configuration:

```bash
# Connection timeout (seconds)
RCLONE_TIMEOUT_CONNECTION=30 # Remote accessibility check
RCLONE_TIMEOUT_CONNECTION=30 # Remote accessibility check (also used for restore/decrypt cloud scan)

# Operation timeout (seconds)
RCLONE_TIMEOUT_OPERATION=300 # Upload/download operations (5 minutes default)
Expand All @@ -553,7 +571,7 @@ RCLONE_FLAGS="--checkers=4 --stats=0 --drive-use-trash=false --drive-pacer-min-s

### Timeout Tuning

- **CONNECTION**: Short timeout for quick accessibility check (default 30s)
- **CONNECTION**: Short timeout for quick accessibility check (default 30s); also caps restore/decrypt cloud scanning (listing backups + reading manifests)
- **OPERATION**: Long timeout for large file uploads (increase for slow networks)

### Bandwidth Limit Format
Expand Down Expand Up @@ -868,7 +886,7 @@ METRICS_PATH=${BASE_DIR}/metrics # Empty = /var/lib/prometheus/node-exporter

```bash
# Cluster configuration
BACKUP_CLUSTER_CONFIG=true # /etc/pve/cluster files
BACKUP_CLUSTER_CONFIG=true # Cluster config + runtime (corosync, pvecm status/nodes, HA status)

# PVE firewall rules
BACKUP_PVE_FIREWALL=true # PVE firewall configuration
Expand Down
24 changes: 12 additions & 12 deletions docs/EXAMPLES.md
Original file line number Diff line number Diff line change
Expand Up @@ -853,39 +853,39 @@ CLOUD_LOG_PATH=

## Example 9: Test in a Chroot/Fixture

**Scenario**: Esegui la raccolta su un root alternativo (chroot, snapshot montato, fixture di test) senza toccare il filesystem live.
**Scenario**: Run collection against an alternate system root (chroot, mounted snapshot, test fixture) without touching the live filesystem.

**Use case**:
- CI/test di backup in ambiente isolato
- Analisi offline di un'immagine/snapshot montata
- Esecuzione in container che monta un root diverso
- CI/test backups in an isolated environment
- Offline analysis of a mounted image/snapshot
- Running inside a container that mounts a different root

### Configuration

```bash
# configs/backup.env
SYSTEM_ROOT_PREFIX=/mnt/snapshot-root # punta al root alternativo
SYSTEM_ROOT_PREFIX=/mnt/snapshot-root # points to the alternate root
BACKUP_ENABLED=true
ENABLE_GO_BACKUP=true
# /etc, /var, /root, /home vengono risolti sotto il prefisso
# /etc, /var, /root, /home are resolved under the prefix
```

### Setup Steps

```bash
# 1) Monta o prepara il root alternativo
mount /dev/vg0/snap /mnt/snapshot-root # esempio
# 1) Mount or prepare the alternate root
mount /dev/vg0/snap /mnt/snapshot-root # example

# 2) Esegui un dry-run
# 2) Run a dry-run
SYSTEM_ROOT_PREFIX=/mnt/snapshot-root ./build/proxsave --dry-run

# 3) Esegui il backup reale (opzionale)
# 3) Run the actual backup (optional)
SYSTEM_ROOT_PREFIX=/mnt/snapshot-root ./build/proxsave
```

### Expected Results
- I file raccolti riflettono il contenuto di `/mnt/snapshot-root/etc`, `/var`, `/root`, `/home`, ecc.
- Nessuna scrittura sul filesystem live del nodo.
- Collected files reflect the contents of `/mnt/snapshot-root/etc`, `/var`, `/root`, `/home`, etc.
- No writes to the node's live filesystem.

---

Expand Down
Loading
Loading