From bb976c18a5d2d55437d967c6767070ba8c73b9da Mon Sep 17 00:00:00 2001 From: Alexander Suslov Date: Thu, 12 Feb 2026 15:12:43 +0400 Subject: [PATCH 1/2] Fix NUMA imbalance to mean uneven GPU distribution across nodes --- primus/tools/preflight/gpu/gpu_topology.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/primus/tools/preflight/gpu/gpu_topology.py b/primus/tools/preflight/gpu/gpu_topology.py index 168121fa1..e483e1d32 100644 --- a/primus/tools/preflight/gpu/gpu_topology.py +++ b/primus/tools/preflight/gpu/gpu_topology.py @@ -109,7 +109,10 @@ def run_gpu_standard_checks(*, force_topology: bool = False) -> Dict[str, Any]: findings.append(Finding("warn", "NUMA mapping unavailable (amd-smi not found); skipped", {})) else: nodes = [x.get("numa_node") for x in numa.get("gpus", []) if x.get("numa_node") is not None] - imbalance = len(set(nodes)) > 1 if nodes else False + imbalance = False + if nodes: + counts = [nodes.count(n) for n in set(nodes)] + imbalance = len(set(counts)) > 1 findings.append( Finding("info", "GPU↔NUMA mapping", {"mapping": numa.get("gpus", []), "imbalance": imbalance}) ) From fe1dce6c8434362bbb50a4245ae1c4975acbfb18 Mon Sep 17 00:00:00 2001 From: Alexander Suslov Date: Thu, 12 Feb 2026 15:35:32 +0400 Subject: [PATCH 2/2] replied to comments --- primus/tools/preflight/gpu/gpu_topology.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/primus/tools/preflight/gpu/gpu_topology.py b/primus/tools/preflight/gpu/gpu_topology.py index e483e1d32..e86198764 100644 --- a/primus/tools/preflight/gpu/gpu_topology.py +++ b/primus/tools/preflight/gpu/gpu_topology.py @@ -7,6 +7,7 @@ from __future__ import annotations import os +from collections import Counter from typing import Any, Dict, List, Optional from .gpu_probe import probe_gpus @@ -111,7 +112,7 @@ def run_gpu_standard_checks(*, force_topology: bool = False) -> Dict[str, Any]: nodes = [x.get("numa_node") for x in numa.get("gpus", []) if x.get("numa_node") is not None] imbalance = False if nodes: - counts = [nodes.count(n) for n in set(nodes)] + counts = Counter(nodes).values() imbalance = len(set(counts)) > 1 findings.append( Finding("info", "GPU↔NUMA mapping", {"mapping": numa.get("gpus", []), "imbalance": imbalance})