Azure · henryli001 · Mar 23, 2026 · Feb 10, 2026 · Copilot · Mar 20, 2026
@@ -376,6 +376,76 @@ func Test_ACL_DisableSSH(t *testing.T) {
 	})
 }
 
+func Test_ACL_GPUNC(t *testing.T) {
+	runScenarioACLGPU(t, "Standard_NC6s_v3")
+}
+
+func Test_ACL_GPUA100(t *testing.T) {
+	runScenarioACLGPU(t, "Standard_NC24ads_A100_v4")
+}
+
+func Test_ACL_GPUA10(t *testing.T) {
+	runScenarioACLGRID(t, "Standard_NV6ads_A10_v5")
+}
+
+// Returns config for the 'gpu' E2E scenario
+func runScenarioACLGPU(t *testing.T, vmSize string) {
+	RunScenario(t, &Scenario{
+		Description: fmt.Sprintf("Tests that a GPU-enabled node with VM size %s using an ACL VHD can be properly bootstrapped", vmSize),
+		Tags: Tags{
+			GPU: true,
+		},
+		Config: Config{
+			Cluster: ClusterKubenet,
+			VHD:     config.VHDACLGen2TL,
+			BootstrapConfigMutator: func(nbc *datamodel.NodeBootstrappingConfiguration) {
+				nbc.AgentPoolProfile.VMSize = vmSize
+				nbc.ConfigGPUDriverIfNeeded = true
+				nbc.EnableGPUDevicePluginIfNeeded = false
+				nbc.EnableNvidia = true
+			},
+			VMConfigMutator: func(vmss *armcompute.VirtualMachineScaleSet) {
+				vmss.SKU.Name = to.Ptr(vmSize)
+				vmss.Properties = addTrustedLaunchToVMSS(vmss.Properties)
+			},
+			Validator: func(ctx context.Context, s *Scenario) {
+				// Ensure nvidia-modprobe install does not restart kubelet and temporarily cause node to be unschedulable
+				ValidateNvidiaModProbeInstalled(ctx, s)
+				ValidateNvidiaPersistencedRunning(ctx, s)
+			},
+		},
+	})
+}
+
+func runScenarioACLGRID(t *testing.T, vmSize string) {
+	RunScenario(t, &Scenario{
+		Description: fmt.Sprintf("Tests that a GPU-enabled node with VM size %s using an ACL VHD can be properly bootstrapped, and that the GRID license is valid", vmSize),
+		Tags: Tags{
+			GPU: true,
+		},
+		Config: Config{
+			Cluster: ClusterKubenet,
+			VHD:     config.VHDACLGen2TL,
+			BootstrapConfigMutator: func(nbc *datamodel.NodeBootstrappingConfiguration) {
+				nbc.AgentPoolProfile.VMSize = vmSize
+				nbc.ConfigGPUDriverIfNeeded = true
+				nbc.EnableGPUDevicePluginIfNeeded = false
+				nbc.EnableNvidia = true
+			},
+			VMConfigMutator: func(vmss *armcompute.VirtualMachineScaleSet) {
+				vmss.SKU.Name = to.Ptr(vmSize)
+				vmss.Properties = addTrustedLaunchToVMSS(vmss.Properties)
+			},
+			Validator: func(ctx context.Context, s *Scenario) {
+				// Ensure nvidia-modprobe install does not restart kubelet and temporarily cause node to be unschedulable
+				ValidateNvidiaModProbeInstalled(ctx, s)
+				ValidateNvidiaGRIDLicenseValid(ctx, s)
+				ValidateNvidiaPersistencedRunning(ctx, s)
+			},
+		},
+	})
+}
+
 func Test_AzureLinuxV3_SecureTLSBootstrapping_BootstrapToken_Fallback(t *testing.T) {
 	RunScenario(t, &Scenario{
 		Description: "Tests that a node using a AzureLinuxV3 Gen2 VHD can be properly bootstrapped even if secure TLS bootstrapping fails",
@@ -1838,6 +1908,35 @@ func Test_AzureLinuxV3_GPU(t *testing.T) {
 	})
 }
 
+func Test_AzureLinuxV3_GPUA10(t *testing.T) {
+	RunScenario(t, &Scenario{
+		Description: "Tests that a GPU-enabled node with A10 GPU SKU using a AzureLinuxV3 (CgroupV2) VHD can be properly bootstrapped",
+		Tags: Tags{
+			GPU: true,
+		},
+		Config: Config{
+			Cluster: ClusterKubenet,
+			VHD:     config.VHDAzureLinuxV3Gen2,
+			BootstrapConfigMutator: func(nbc *datamodel.NodeBootstrappingConfiguration) {
+				nbc.AgentPoolProfile.VMSize = "Standard_NV6ads_A10_v5"
+				nbc.ConfigGPUDriverIfNeeded = true
+				nbc.EnableGPUDevicePluginIfNeeded = false
+				nbc.EnableNvidia = true
+			},
+			VMConfigMutator: func(vmss *armcompute.VirtualMachineScaleSet) {
+				vmss.SKU.Name = to.Ptr("Standard_NV6ads_A10_v5")
+			},
+			Validator: func(ctx context.Context, s *Scenario) {
+				ValidateNvidiaModProbeInstalled(ctx, s)
+				ValidateNvidiaGRIDLicenseValid(ctx, s)
+				ValidateKubeletHasNotStopped(ctx, s)
+				ValidateServicesDoNotRestartKubelet(ctx, s)
+				ValidateNvidiaPersistencedRunning(ctx, s)
+			},
+		},
+	})
+}
+
 func Test_AzureLinuxV3_GPUAzureCNI(t *testing.T) {
 	RunScenario(t, &Scenario{
 		Description: "AzureLinux V3 (CgroupV2) gpu scenario on cluster configured with Azure CNI",

@@ -21,12 +21,25 @@ downloadSysextFromVersion() {
 
 matchLocalSysext() {
     local seName=$1 desiredVer=$2 seArch=$3
-    printf "%s\n" "/opt/${seName}/downloads/${seName}-v${desiredVer}"[.~-]*"-${seArch}.raw" | sort -V | tail -n1
+    local downloadDir="/opt/${seName}/downloads"
+    # Try arch-specific versioned filename first (kubelet-style: name-vVER.X-arch.raw)
+    local match
+    match=$(find "${downloadDir}" -maxdepth 2 -name "${seName}-v${desiredVer}*-${seArch}.raw" -type f 2>/dev/null | sort -V | tail -n1)
+    if [ -f "${match}" ]; then
+        echo "${match}"
+        return
+    fi
+    # Fallback: GPU sysexts are downloaded as simple name.raw (e.g. nvidia-driver-vgpu.raw).
+    # MCR artifacts may place files in an arch subdirectory (e.g. amd64/name.raw),
+    # so search up to 2 levels deep.
+    match=$(find "${downloadDir}" -maxdepth 2 -name "${seName}.raw" -type f 2>/dev/null | head -n1)
-    # MCR artifacts may place files in an arch subdirectory (e.g. amd64/name.raw),
-    # so search up to 2 levels deep.
-    match=$(find "${downloadDir}" -maxdepth 2 -name "${seName}.raw" -type f 2>/dev/null | head -n1)
+    # Prefer an arch-specific subdirectory (${downloadDir}/${seArch}) when present,
+    # then fall back to an arch-neutral file directly under ${downloadDir}. In both
+    # cases, pick the highest version deterministically.
+    match=$(find "${downloadDir}/${seArch}" -maxdepth 1 -name "${seName}.raw" -type f 2>/dev/null | sort -V | tail -n1)
+    if [ -f "${match}" ]; then
+        echo "${match}"
+        return
+    fi
+    match=$(find "${downloadDir}" -maxdepth 1 -name "${seName}.raw" -type f 2>/dev/null | sort -V | tail -n1)
-    # MCR artifacts may place files in an arch subdirectory (e.g. amd64/name.raw),
-    # so search up to 2 levels deep.
-    match=$(find "${downloadDir}" -maxdepth 2 -name "${seName}.raw" -type f 2>/dev/null | head -n1)
+    # Prefer an arch-specific subdirectory (${downloadDir}/${seArch}) when present,
+    # then fall back to an arch-neutral file directly under ${downloadDir}. In both
+    # cases, pick the highest version deterministically.
+    match=$(find "${downloadDir}/${seArch}" -maxdepth 1 -name "${seName}.raw" -type f 2>/dev/null | sort -V | tail -n1)
+    if [ -f "${match}" ]; then
+        echo "${match}"
+        return
+    fi
+    match=$(find "${downloadDir}" -maxdepth 1 -name "${seName}.raw" -type f 2>/dev/null | sort -V | tail -n1)
+    echo "${match}"
 }
 
 matchRemoteSysext() {
     local seURL=$1 desiredVer=$2 seArch=$3
-    retrycmd_silent 120 5 20 oras repo tags --registry-config "${ORAS_REGISTRY_CONFIG_FILE}" "${seURL}" | grep -Ex "v${desiredVer//./\\.}[.~-].*-azlinux3-${seArch}" | sort -V | tail -n1
+    # Match either arch-specific tags (v{ver}[.~-]*-azlinux3-{arch}) or exact version tags ({ver})
+    retrycmd_silent 120 5 20 oras repo tags --registry-config "${ORAS_REGISTRY_CONFIG_FILE}" "${seURL}" | grep -Ex "(v${desiredVer//./\\.}[.~-].*-azlinux3-${seArch}|${desiredVer//./\\.})" | sort -V | tail -n1
     test ${PIPESTATUS[0]} -eq 0
-    # Match either arch-specific tags (v{ver}[.~-]*-azlinux3-{arch}) or exact version tags ({ver})
-    retrycmd_silent 120 5 20 oras repo tags --registry-config "${ORAS_REGISTRY_CONFIG_FILE}" "${seURL}" | grep -Ex "(v${desiredVer//./\\.}[.~-].*-azlinux3-${seArch}|${desiredVer//./\\.})" | sort -V | tail -n1
-    test ${PIPESTATUS[0]} -eq 0
+    local tags archPattern exactPattern match
+
+    # Fetch all tags once; retrycmd_silent handles retries and logging.
+    tags=$(retrycmd_silent 120 5 20 oras repo tags --registry-config "${ORAS_REGISTRY_CONFIG_FILE}" "${seURL}")
+    if [ $? -ne 0 ]; then
+        # Propagate failure from oras/registry access.
+        return 1
+    fi
+
+    # First pass: prefer arch-specific tags (v{ver}[.~-]*-azlinux3-{arch}).
+    archPattern="^v${desiredVer//./\\.}[.~-].*-azlinux3-${seArch}$"
+    match=$(printf '%s\n' "${tags}" | grep -E "${archPattern}" | sort -V | tail -n1)
+    if [ -n "${match}" ]; then
+        echo "${match}"
+        return 0
+    fi
+
+    # Second pass: fall back to exact-version tags ({ver}) if no arch-specific tag exists.
+    exactPattern="^${desiredVer//./\\.}$"
+    match=$(printf '%s\n' "${tags}" | grep -E "${exactPattern}" | sort -V | tail -n1)
+    echo "${match}"
-    # Match either arch-specific tags (v{ver}[.~-]*-azlinux3-{arch}) or exact version tags ({ver})
-    retrycmd_silent 120 5 20 oras repo tags --registry-config "${ORAS_REGISTRY_CONFIG_FILE}" "${seURL}" | grep -Ex "(v${desiredVer//./\\.}[.~-].*-azlinux3-${seArch}|${desiredVer//./\\.})" | sort -V | tail -n1
-    test ${PIPESTATUS[0]} -eq 0
+    local tags archPattern exactPattern match
+
+    # Fetch all tags once; retrycmd_silent handles retries and logging.
+    tags=$(retrycmd_silent 120 5 20 oras repo tags --registry-config "${ORAS_REGISTRY_CONFIG_FILE}" "${seURL}")
+    if [ $? -ne 0 ]; then
+        # Propagate failure from oras/registry access.
+        return 1
+    fi
+
+    # First pass: prefer arch-specific tags (v{ver}[.~-]*-azlinux3-{arch}).
+    archPattern="^v${desiredVer//./\\.}[.~-].*-azlinux3-${seArch}$"
+    match=$(printf '%s\n' "${tags}" | grep -E "${archPattern}" | sort -V | tail -n1)
+    if [ -n "${match}" ]; then
+        echo "${match}"
+        return 0
+    fi
+
+    # Second pass: fall back to exact-version tags ({ver}) if no arch-specific tag exists.
+    exactPattern="^${desiredVer//./\\.}$"
+    match=$(printf '%s\n' "${tags}" | grep -E "${exactPattern}" | sort -V | tail -n1)
+    echo "${match}"
 }
 
@@ -100,6 +113,88 @@ installCredentialProviderPackageFromBootstrapProfileRegistry() {
     installCredentialProviderFromPkg "$2" "$1"
 }
 
+# Reads VERSION_ID from /etc/os-release for use as the sysext version tag.
+# GPU sysexts are tagged by the OS image version, not the driver version.
+getACLVersionID() {
+    local version_id
+    version_id=$(. /etc/os-release && echo "${VERSION_ID}")
+    if [ -z "${version_id}" ]; then
+        echo "ERROR: VERSION_ID not found in /etc/os-release" >&2
+        return "${ERR_SYSEXT_VERSION_ID_NOT_FOUND}"
+    fi
+    echo "${version_id}"
+}
+
+# Pulls a GPU-related sysext by name using the ACL MCR registry.
+# Registry path uses major.minor (e.g. 3.0), tag uses full VERSION_ID (e.g. 3.0.20260304).
+# Example: mcr.microsoft.com/azurelinux/3.0/azure-container-linux/nvidia-driver-cuda:3.0.20260304
+installACLGPUSysext() {
+    local sysext_name=$1
+    local version_id
+    version_id=$(getACLVersionID) || exit $ERR_SYSEXT_VERSION_ID_NOT_FOUND
+    local mcr_base="${MCR_REPOSITORY_BASE:-mcr.microsoft.com}"
+    local registry_base="${mcr_base%/}/azurelinux/${version_id%.*}/azure-container-linux"
+    mergeSysexts "${sysext_name}" "${registry_base}/${sysext_name}" "${version_id}" \
+        || exit $ERR_ORAS_PULL_SYSEXT_FAIL
+}
+
+installGPUDriverSysext() {
+    # ACL NVIDIA GPU driver sysext registry paths:
+    # Registry path uses major.minor (e.g. 3.0), tag uses full VERSION_ID (e.g. 3.0.20260304).
+    #
+    # 1. NVIDIA proprietary driver:
+    # mcr.microsoft.com/azurelinux/3.0/azure-container-linux/nvidia-driver-cuda:${VERSION_ID}...
+    #
+    # 2. NVIDIA OpenRM driver:
+    # mcr.microsoft.com/azurelinux/3.0/azure-container-linux/nvidia-driver-cuda-open:${VERSION_ID}...
+    #
+    # 3. NVIDIA GRID (vGPU guest) driver for converged GPU sizes:
+    # mcr.microsoft.com/azurelinux/3.0/azure-container-linux/nvidia-driver-vgpu:${VERSION_ID}...
+    #
+    # NVIDIA_GPU_DRIVER_TYPE is set by AgentBaker based on ConvergedGPUDriverSizes map
+    # in gpu_components.go. Converged sizes get "grid"; all others get "cuda".
+    # Legacy GPUs (T4, V100) require proprietary CUDA drivers; A100+ use NVIDIA open drivers.
+    local vm_sku
+    vm_sku=$(get_compute_sku)
+    local sysext_name
+
+    # Converged GPU sizes (NVads_A10_v5, NCads_A10_v4) use GRID drivers
+    if [ "$NVIDIA_GPU_DRIVER_TYPE" = "grid" ]; then
+        echo "VM SKU ${vm_sku} uses NVIDIA GRID driver (converged)"
+        sysext_name="nvidia-driver-vgpu"
+    else
+        local driver_ret
+        should_use_nvidia_open_drivers
+        driver_ret=$?
+        if [ "$driver_ret" -eq 2 ]; then
+            echo "Failed to determine GPU driver type"
+            exit $ERR_MISSING_CUDA_PACKAGE
-            echo "Failed to determine GPU driver type"
-            exit $ERR_MISSING_CUDA_PACKAGE
+            echo "Failed to determine GPU driver type for this VM: unable to determine VM SKU (should_use_nvidia_open_drivers returned ${driver_ret})"
+            exit "${driver_ret}"
-            echo "Failed to determine GPU driver type"
-            exit $ERR_MISSING_CUDA_PACKAGE
+            echo "Failed to determine GPU driver type for this VM: unable to determine VM SKU (should_use_nvidia_open_drivers returned ${driver_ret})"
+            exit "${driver_ret}"
+        elif [ "$driver_ret" -eq 0 ]; then
+            echo "VM SKU ${vm_sku} uses NVIDIA OpenRM driver (cuda-open)"
+            sysext_name="nvidia-driver-cuda-open"
+        else
+            echo "VM SKU ${vm_sku} uses NVIDIA proprietary driver (cuda)"
+            sysext_name="nvidia-driver-cuda"
+        fi
+    fi
+
+    installACLGPUSysext "${sysext_name}"
+
+    # Process tmpfiles.d rules shipped inside the GPU sysexts (e.g. symlink
+    # /etc/nvidia/gridd.conf -> /usr/share/nvidia/gridd.conf). The sysext
+    # overlay only covers /usr; files under /etc must be created on the
+    # writable root via tmpfiles.d rules.
+    systemd-tmpfiles --create
+}
+
+installNvidiaContainerToolkitSysext() {
+    installACLGPUSysext nvidia-container-toolkit
+}
+
+installNvidiaFabricManagerSysext() {
+    installACLGPUSysext nvidia-fabric-manager
+}
+
 ensureRunc() {
     stub
 }

@@ -943,6 +943,10 @@ configGPUDrivers() {
         downloadGPUDrivers
         installNvidiaContainerToolkit
         enableNvidiaPersistenceMode
+    elif isACL "$OS" "$OS_VARIANT"; then
+        installNvidiaContainerToolkitSysext
+        installGPUDriverSysext
+        enableNvidiaPersistenceMode
     else
         echo "os $OS $OS_VARIANT not supported at this time. skipping configGPUDrivers"
         exit 1
@@ -952,16 +956,16 @@ configGPUDrivers() {
     retrycmd_if_failure 120 5 300 nvidia-smi || exit $ERR_GPU_DRIVERS_START_FAIL
     retrycmd_if_failure 120 5 25 ldconfig || exit $ERR_GPU_DRIVERS_START_FAIL
 
+    # Fix the NVIDIA /dev/char link issue (Mariner/AzureLinux only)
     if isMarinerOrAzureLinux "$OS"; then
-        # GRID vGPU licensing: restart nvidia-gridd after device nodes exist
-        if [ "$NVIDIA_GPU_DRIVER_TYPE" = "grid" ]; then
-            systemctlEnableAndStart nvidia-gridd 30
-        fi
-
-        # Fix the NVIDIA /dev/char link issue
         createNvidiaSymlinkToAllDeviceNodes
     fi
 
+    # GRID vGPU licensing: start nvidia-gridd service to ensure license configuration
+    if (isMarinerOrAzureLinux "$OS" || isACL "$OS" "$OS_VARIANT") && [ "$NVIDIA_GPU_DRIVER_TYPE" = "grid" ]; then
+        systemctlEnableAndStart nvidia-gridd 300 || exit $ERR_SYSTEMCTL_START_FAIL
+    fi
+
     retrycmd_if_failure 120 5 25 pkill -SIGHUP containerd || exit $ERR_GPU_DRIVERS_INSTALL_TIMEOUT
 
     # NPD is installed as a VM extension, which might happen before/after/during CSE, so this

@@ -155,6 +155,7 @@ ERR_LOOKUP_ENABLE_MANAGED_GPU_EXPERIENCE_TAG=230 # Error checking nodepool tags
 
 ERR_PULL_POD_INFRA_CONTAINER_IMAGE=225 # Error pulling pause image
 ERR_ORAS_PULL_SYSEXT_FAIL=231 # Error pulling systemd system extension artifact via oras from registry
+ERR_SYSEXT_VERSION_ID_NOT_FOUND=232 # VERSION_ID not found in /etc/os-release, required for sysext tag resolution
 
 # ----------------------- AKS Node Controller----------------------------------
 ERR_AKS_NODE_CONTROLLER_ERROR=240 # Generic error in AKS Node Controller

@@ -851,4 +851,65 @@ datasource:
 EOF
 }
 
+# ==== GPU driver functions ====
+# Shared between Azure Linux (Mariner) and ACL distro install scripts.
+# These functions are only invoked on GPU-enabled VM SKUs during provisioning;
+# they are safe to define on all distros (no execution at source time).
+
+should_use_nvidia_open_drivers() {
+    # Checks if the VM SKU should use NVIDIA open drivers (vs proprietary drivers).
+    # Legacy GPUs (T4, V100) use NVIDIA proprietary drivers; A100+ use NVIDIA open drivers.
+    # Returns: 0 (true) for open drivers, 1 (false) for proprietary drivers, 2 on error
+    local vm_sku
+    vm_sku=$(get_compute_sku)
+    if [ -z "$vm_sku" ]; then
+        echo "Error: Unable to determine VM SKU, cannot select GPU driver" >&2
+        return 2
+    fi
+    local lower="${vm_sku,,}"
+
+    # T4 GPUs (NC*_T4_v3 family) use proprietary drivers
+    # V100 GPUs: NDv2 (nd40rs_v2), NDv3 (nd40s_v3), NCsv3 (nc*s_v3) use proprietary drivers
+    case "$lower" in
+        *t4_v3*)
+            return 1
+            ;;
+        *nd40rs_v2*)
+            return 1
+            ;;
+        *nd40s_v3*)
+            return 1
+            ;;
+        standard_nc*s_v3*)
+            return 1
+            ;;
+    esac
+
+    # All other GPU SKUs (A100+) use open drivers
+    return 0
+}
+
+enableNvidiaPersistenceMode() {
+    PERSISTENCED_SERVICE_FILE_PATH="/etc/systemd/system/nvidia-persistenced.service"
+    touch ${PERSISTENCED_SERVICE_FILE_PATH}
+    cat << EOF > ${PERSISTENCED_SERVICE_FILE_PATH}
-    PERSISTENCED_SERVICE_FILE_PATH="/etc/systemd/system/nvidia-persistenced.service"
-    touch ${PERSISTENCED_SERVICE_FILE_PATH}
-    cat << EOF > ${PERSISTENCED_SERVICE_FILE_PATH}
+    local PERSISTENCED_SERVICE_FILE_PATH="/etc/systemd/system/nvidia-persistenced.service"
+    touch "${PERSISTENCED_SERVICE_FILE_PATH}"
+    cat << EOF > "${PERSISTENCED_SERVICE_FILE_PATH}"
-    PERSISTENCED_SERVICE_FILE_PATH="/etc/systemd/system/nvidia-persistenced.service"
-    touch ${PERSISTENCED_SERVICE_FILE_PATH}
-    cat << EOF > ${PERSISTENCED_SERVICE_FILE_PATH}
+    local PERSISTENCED_SERVICE_FILE_PATH="/etc/systemd/system/nvidia-persistenced.service"
+    touch "${PERSISTENCED_SERVICE_FILE_PATH}"
+    cat << EOF > "${PERSISTENCED_SERVICE_FILE_PATH}"
+[Unit]
+Description=NVIDIA Persistence Daemon
+Wants=syslog.target
+
+[Service]
+Type=forking
+ExecStart=/usr/bin/nvidia-persistenced --verbose
+ExecStopPost=/bin/rm -rf /var/run/nvidia-persistenced
+Restart=always
+TimeoutSec=300
+
+[Install]
+WantedBy=multi-user.target
+EOF
+
+    systemctl enable nvidia-persistenced.service || exit 1
+    systemctl restart nvidia-persistenced.service || exit 1
-    systemctl enable nvidia-persistenced.service || exit 1
-    systemctl restart nvidia-persistenced.service || exit 1
+    if ! systemctlEnableAndStart nvidia-persistenced.service; then
+        return $ERR_SYSTEMCTL_START_FAIL
+    fi
-    systemctl enable nvidia-persistenced.service || exit 1
-    systemctl restart nvidia-persistenced.service || exit 1
+    if ! systemctlEnableAndStart nvidia-persistenced.service; then
+        return $ERR_SYSTEMCTL_START_FAIL
+    fi
+}
+
 #EOF
@@ -375,6 +375,8 @@ function nodePrep {
             # while it fails to install on NC24.
             if isMarinerOrAzureLinux "$OS"; then
                 logs_to_events "AKS.CSE.installNvidiaFabricManager" installNvidiaFabricManager
+            elif isACL "$OS" "$OS_VARIANT"; then
+                logs_to_events "AKS.CSE.installNvidiaFabricManagerSysext" installNvidiaFabricManagerSysext
             fi
             # Start fabric manager service
             logs_to_events "AKS.CSE.nvidia-fabricmanager" "systemctlEnableAndStart nvidia-fabricmanager 30" || exit $ERR_GPU_DRIVERS_START_FAIL