From e6291b9163d0dc0a19e31d47b75f700aa06c437a Mon Sep 17 00:00:00 2001 From: Matee Ullah Malik Date: Wed, 31 Dec 2025 04:01:53 +0500 Subject: [PATCH 1/5] fix storage calculation in metrics --- supernode/supernode_metrics/metrics_collection.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/supernode/supernode_metrics/metrics_collection.go b/supernode/supernode_metrics/metrics_collection.go index f5b795c6..dd3b0c41 100644 --- a/supernode/supernode_metrics/metrics_collection.go +++ b/supernode/supernode_metrics/metrics_collection.go @@ -3,6 +3,7 @@ package supernode_metrics import ( "context" "fmt" + "math" "strconv" "strings" "time" @@ -51,9 +52,10 @@ func (hm *Collector) collectMetrics(ctx context.Context) (sntypes.SupernodeMetri storage := statusResp.Resources.StorageVolumes[0] // 9–11: first volume is reported const bytesToGB = 1024.0 * 1024.0 * 1024.0 - metrics.DiskTotalGb = float64(storage.TotalBytes) / bytesToGB // 9: disk_total_gb - metrics.DiskFreeGb = float64(storage.AvailableBytes) / bytesToGB // 11: disk_free_gb - metrics.DiskUsagePercent = storage.UsagePercent // 10: disk_usage_percent + const diskTotalAdjustFactor = 1.034 + metrics.DiskTotalGb = math.Floor((float64(storage.TotalBytes) / bytesToGB) * diskTotalAdjustFactor) // 9: disk_total_gb + metrics.DiskFreeGb = float64(storage.AvailableBytes) / bytesToGB // 11: disk_free_gb + metrics.DiskUsagePercent = storage.UsagePercent // 10: disk_usage_percent if metrics.DiskUsagePercent == 0 && storage.TotalBytes > 0 { used := storage.TotalBytes - storage.AvailableBytes From 5eef05dc28e82bc8e3eddc76e5abc73e5a59d6a1 Mon Sep 17 00:00:00 2001 From: Matee Ullah Malik Date: Wed, 31 Dec 2025 05:30:10 +0500 Subject: [PATCH 2/5] comment --- supernode/supernode_metrics/metrics_collection.go | 1 + 1 file changed, 1 insertion(+) diff --git a/supernode/supernode_metrics/metrics_collection.go b/supernode/supernode_metrics/metrics_collection.go index dd3b0c41..55b207ce 100644 --- a/supernode/supernode_metrics/metrics_collection.go +++ b/supernode/supernode_metrics/metrics_collection.go @@ -52,6 +52,7 @@ func (hm *Collector) collectMetrics(ctx context.Context) (sntypes.SupernodeMetri storage := statusResp.Resources.StorageVolumes[0] // 9–11: first volume is reported const bytesToGB = 1024.0 * 1024.0 * 1024.0 + // Compensates for observed differences between reported and actual disk size. const diskTotalAdjustFactor = 1.034 metrics.DiskTotalGb = math.Floor((float64(storage.TotalBytes) / bytesToGB) * diskTotalAdjustFactor) // 9: disk_total_gb metrics.DiskFreeGb = float64(storage.AvailableBytes) / bytesToGB // 11: disk_free_gb From 845cca0d5496bfd0f2e9e92174770d2a2426f92e Mon Sep 17 00:00:00 2001 From: Matee ullah Malik <46045452+mateeullahmalik@users.noreply.github.com> Date: Wed, 31 Dec 2025 06:53:35 +0500 Subject: [PATCH 3/5] Return diagnostics snapshot on first stats --- p2p/p2p_stats.go | 63 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 56 insertions(+), 7 deletions(-) diff --git a/p2p/p2p_stats.go b/p2p/p2p_stats.go index 49e1bcaa..f406404b 100644 --- a/p2p/p2p_stats.go +++ b/p2p/p2p_stats.go @@ -114,6 +114,14 @@ func (m *p2pStatsManager) Stats(ctx context.Context, p *p2p) (*StatsSnapshot, er } prev := m.getSnapshot() + if prev == nil { + next, _ := m.refreshDiagnosticsSync(ctx, p) + if next != nil { + prev = next + } else { + prev = m.getSnapshot() + } + } snap := cloneSnapshot(prev) snap.PeersCount = peersCount // Store a separate struct instance in the cache to avoid aliasing with the returned snapshot, @@ -128,6 +136,45 @@ func (m *p2pStatsManager) Stats(ctx context.Context, p *p2p) (*StatsSnapshot, er return snap, nil } +func (m *p2pStatsManager) refreshDiagnosticsSync(ctx context.Context, p *p2p) (*StatsSnapshot, error) { + if m == nil || p == nil { + return nil, nil + } + if !m.refreshInFlight.CompareAndSwap(false, true) { + return nil, nil + } + + defer m.refreshInFlight.Store(false) + start := time.Now() + refreshCtx, cancel := context.WithTimeout(context.Background(), p2pStatsRefreshTimeout) + next, err := m.collectDiagnostics(refreshCtx, p, m.getSnapshot()) + cancel() + dur := time.Since(start) + + if next != nil { + m.setSnapshot(next) + m.markFresh() + } + + if err != nil { + logtrace.Warn(ctx, "p2p stats diagnostics initial refresh failed", logtrace.Fields{ + logtrace.FieldModule: "p2p", + "refresh": "diagnostics", + "ms": dur.Milliseconds(), + logtrace.FieldError: err.Error(), + }) + } + if dur > p2pStatsSlowRefreshThreshold { + logtrace.Warn(ctx, "p2p stats diagnostics initial refresh slow", logtrace.Fields{ + logtrace.FieldModule: "p2p", + "refresh": "diagnostics", + "ms": dur.Milliseconds(), + }) + } + + return next, err +} + func (m *p2pStatsManager) maybeRefreshDiagnostics(ctx context.Context, p *p2p) { if m == nil || p == nil { return @@ -142,10 +189,15 @@ func (m *p2pStatsManager) maybeRefreshDiagnostics(ctx context.Context, p *p2p) { start := time.Now() refreshCtx, cancel := context.WithTimeout(context.Background(), p2pStatsRefreshTimeout) - err := m.refreshDiagnostics(refreshCtx, p) + next, err := m.collectDiagnostics(refreshCtx, p, m.getSnapshot()) cancel() dur := time.Since(start) + if next != nil { + m.setSnapshot(next) + m.markFresh() + } + if err != nil { logtrace.Warn(logCtx, "p2p stats diagnostics refresh failed", logtrace.Fields{ logtrace.FieldModule: "p2p", @@ -164,12 +216,11 @@ func (m *p2pStatsManager) maybeRefreshDiagnostics(ctx context.Context, p *p2p) { }() } -func (m *p2pStatsManager) refreshDiagnostics(ctx context.Context, p *p2p) error { +func (m *p2pStatsManager) collectDiagnostics(ctx context.Context, p *p2p, prev *StatsSnapshot) (*StatsSnapshot, error) { if err := ctx.Err(); err != nil { - return err + return nil, err } - prev := m.getSnapshot() next := cloneSnapshot(prev) var refreshErr error @@ -203,9 +254,7 @@ func (m *p2pStatsManager) refreshDiagnostics(ctx context.Context, p *p2p) error } } - m.setSnapshot(next) - m.markFresh() - return refreshErr + return next, refreshErr } func cloneSnapshot(in *StatsSnapshot) *StatsSnapshot { From 09874bf63f8f9db856c6107aee1219b54f341309 Mon Sep 17 00:00:00 2001 From: Matee Ullah Malik Date: Wed, 31 Dec 2025 11:44:19 +0500 Subject: [PATCH 4/5] Five percent multipler --- supernode/supernode_metrics/metrics_collection.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/supernode/supernode_metrics/metrics_collection.go b/supernode/supernode_metrics/metrics_collection.go index 55b207ce..47e80af5 100644 --- a/supernode/supernode_metrics/metrics_collection.go +++ b/supernode/supernode_metrics/metrics_collection.go @@ -53,7 +53,7 @@ func (hm *Collector) collectMetrics(ctx context.Context) (sntypes.SupernodeMetri const bytesToGB = 1024.0 * 1024.0 * 1024.0 // Compensates for observed differences between reported and actual disk size. - const diskTotalAdjustFactor = 1.034 + const diskTotalAdjustFactor = 1.05 // 5% metrics.DiskTotalGb = math.Floor((float64(storage.TotalBytes) / bytesToGB) * diskTotalAdjustFactor) // 9: disk_total_gb metrics.DiskFreeGb = float64(storage.AvailableBytes) / bytesToGB // 11: disk_free_gb metrics.DiskUsagePercent = storage.UsagePercent // 10: disk_usage_percent From 8e1f14bb158eac4ca704991c4fdb14d0f2baffac Mon Sep 17 00:00:00 2001 From: Matee Ullah Malik Date: Wed, 31 Dec 2025 13:12:37 +0500 Subject: [PATCH 5/5] Adjust the disk calculation multiplier --- supernode/status/metrics.go | 38 ++++++++++++++++++- supernode/status/service.go | 7 +++- .../supernode_metrics/metrics_collection.go | 12 +++--- 3 files changed, 49 insertions(+), 8 deletions(-) diff --git a/supernode/status/metrics.go b/supernode/status/metrics.go index ff29d100..3263ded3 100644 --- a/supernode/status/metrics.go +++ b/supernode/status/metrics.go @@ -2,6 +2,7 @@ package status import ( "context" + "math" "time" "github.com/LumeraProtocol/supernode/v2/pkg/logtrace" @@ -10,6 +11,21 @@ import ( "github.com/shirou/gopsutil/v3/mem" ) +// diskSizeAdjustFactor compensates for observed discrepancies between the disk +// size reported by the node runtime and the "expected" decimal-GB figure used by +// external consumers (dashboards/on-chain metrics). +// +// Rationale: +// - Keep the adjustment in exactly one place (the status metrics source) so all +// downstream consumers remain consistent. +// - Apply it to both total and free to preserve internal consistency between +// total/free/usage%. +const diskSizeAdjustFactor = 1.1 + +func adjustDiskBytes(value uint64) uint64 { + return uint64(math.Round(float64(value) * diskSizeAdjustFactor)) +} + // MetricsCollector handles system resource monitoring type MetricsCollector struct{} @@ -60,6 +76,9 @@ func (m *MetricsCollector) CollectStorageMetrics(ctx context.Context, paths []st if len(paths) == 0 { paths = []string{"/"} } + // Note: callers may request multiple paths, but higher-level services report + // only the first volume to keep node metrics stable and comparable across + // environments (host vs container overlays, multiple mount points, etc.). var storageInfos []StorageInfo for _, path := range paths { usage, err := disk.Usage(path) @@ -67,7 +86,24 @@ func (m *MetricsCollector) CollectStorageMetrics(ctx context.Context, paths []st logtrace.Error(ctx, "failed to get storage info", logtrace.Fields{logtrace.FieldError: err.Error(), "path": path}) continue } - storageInfos = append(storageInfos, StorageInfo{Path: path, TotalBytes: usage.Total, UsedBytes: usage.Used, AvailableBytes: usage.Free, UsagePercent: usage.UsedPercent}) + totalBytes := adjustDiskBytes(usage.Total) + availableBytes := adjustDiskBytes(usage.Free) + if availableBytes > totalBytes { + availableBytes = totalBytes + } + usedBytes := totalBytes - availableBytes + usagePercent := 0.0 + if totalBytes > 0 { + usagePercent = float64(usedBytes) / float64(totalBytes) * 100 + } + + storageInfos = append(storageInfos, StorageInfo{ + Path: path, + TotalBytes: totalBytes, + UsedBytes: usedBytes, + AvailableBytes: availableBytes, + UsagePercent: usagePercent, + }) } return storageInfos } diff --git a/supernode/status/service.go b/supernode/status/service.go index 33faef1d..f7dab3ae 100644 --- a/supernode/status/service.go +++ b/supernode/status/service.go @@ -84,7 +84,12 @@ func (s *SupernodeStatusService) GetStatus(ctx context.Context, includeP2PMetric resp.Resources.HardwareSummary = fmt.Sprintf("%d cores / %.0fGB RAM", cores, resp.Resources.Memory.TotalGb) } // Storage metrics - for _, si := range s.metrics.CollectStorageMetrics(ctx, s.storagePaths) { + if storageInfos := s.metrics.CollectStorageMetrics(ctx, s.storagePaths); len(storageInfos) > 0 { + // Rationale: report only the first volume everywhere (status + on-chain + // metrics) to avoid ambiguity across environments where multiple mounts + // exist (e.g. container overlay + host filesystem). The configured default + // is "/" so this remains stable. + si := storageInfos[0] resp.Resources.StorageVolumes = append(resp.Resources.StorageVolumes, &pb.StatusResponse_Resources_Storage{ Path: si.Path, TotalBytes: si.TotalBytes, diff --git a/supernode/supernode_metrics/metrics_collection.go b/supernode/supernode_metrics/metrics_collection.go index 47e80af5..254780ec 100644 --- a/supernode/supernode_metrics/metrics_collection.go +++ b/supernode/supernode_metrics/metrics_collection.go @@ -3,7 +3,6 @@ package supernode_metrics import ( "context" "fmt" - "math" "strconv" "strings" "time" @@ -49,14 +48,15 @@ func (hm *Collector) collectMetrics(ctx context.Context) (sntypes.SupernodeMetri } if statusResp.Resources != nil && len(statusResp.Resources.StorageVolumes) > 0 { + // Storage is sourced from the status service. Any disk-size adjustment + // must happen there (single source of truth) so status + on-chain metrics + // stay consistent. storage := statusResp.Resources.StorageVolumes[0] // 9–11: first volume is reported const bytesToGB = 1024.0 * 1024.0 * 1024.0 - // Compensates for observed differences between reported and actual disk size. - const diskTotalAdjustFactor = 1.05 // 5% - metrics.DiskTotalGb = math.Floor((float64(storage.TotalBytes) / bytesToGB) * diskTotalAdjustFactor) // 9: disk_total_gb - metrics.DiskFreeGb = float64(storage.AvailableBytes) / bytesToGB // 11: disk_free_gb - metrics.DiskUsagePercent = storage.UsagePercent // 10: disk_usage_percent + metrics.DiskTotalGb = float64(storage.TotalBytes) / bytesToGB // 9: disk_total_gb + metrics.DiskFreeGb = float64(storage.AvailableBytes) / bytesToGB // 11: disk_free_gb + metrics.DiskUsagePercent = storage.UsagePercent // 10: disk_usage_percent if metrics.DiskUsagePercent == 0 && storage.TotalBytes > 0 { used := storage.TotalBytes - storage.AvailableBytes