diff --git a/DOCKER.md b/DOCKER.md index 3e56d2b..bf43ad4 100644 --- a/DOCKER.md +++ b/DOCKER.md @@ -1,10 +1,15 @@ -# Docker Guide +# Docker / Singularity Guide -The Docker image provides a fully headless rendering environment. By default -it uses [OSMesa](https://docs.mesa3d.org/osmesa.html) (Mesa's CPU software -renderer) — no display server, `xvfb`, or GPU required. If a GPU render -device is available, pass `--device /dev/dri/renderD128` to enable EGL GPU -rendering instead; `libegl1` is already included in the image. +The Docker image provides a fully headless rendering environment using +**EGL** — no display server or `xvfb` required. + +- **CPU rendering (default):** EGL falls back to Mesa's llvmpipe software + renderer automatically. No GPU or special flags needed. +- **GPU rendering (NVIDIA):** + - For **Docker**, pass `--gpus all` and EGL selects the + GPU via the NVIDIA Container Toolkit. + - For **Singularity/Apptainer**, pass `--nv` (NVIDIA) enables GPU + rendering via EGL automatically. The default entry point is `whippersnap4` (four-view batch rendering). `whippersnap1` (single-view snapshot and rotation video) can be invoked by @@ -42,6 +47,26 @@ docker run --rm --init \ -o /output/snap4.png ``` +### With NVIDIA GPU (faster rendering) + +Pass `--gpus all` to let EGL use the GPU via the NVIDIA Container Toolkit: + +```bash +docker run --rm --init \ + --gpus all \ + -v /path/to/subject:/subject \ + -v /path/to/output:/output \ + --user $(id -u):$(id -g) \ + whippersnappy \ + -lh /subject/surf/lh.thickness \ + -rh /subject/surf/rh.thickness \ + -sd /subject \ + -o /output/snap4.png +``` + +> **Note:** Requires the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) +> installed on the host (`nvidia-ctk --version` to verify). + ### With an annotation file instead of an overlay ```bash @@ -181,6 +206,37 @@ parent directory to retrieve them on the host. --- +## Singularity / Apptainer + +The same image can be used with Singularity or Apptainer. + +**CPU rendering** (default — no GPU needed): +```bash +singularity exec \ + -B /path/to/subject:/subject \ + -B /path/to/output:/output \ + whippersnappy.sif \ + whippersnap4 \ + -lh /subject/surf/lh.thickness \ + -rh /subject/surf/rh.thickness \ + -sd /subject -o /output/snap4.png +``` + +**GPU rendering** — pass `--nv` (NVIDIA) or `--rocm` (AMD); EGL selects +the GPU automatically: +```bash +singularity exec --nv \ + -B /path/to/subject:/subject \ + -B /path/to/output:/output \ + whippersnappy.sif \ + whippersnap4 \ + -lh /subject/surf/lh.thickness \ + -rh /subject/surf/rh.thickness \ + -sd /subject -o /output/snap4.png +``` + +--- + ## Notes - The `--init` flag is recommended so that signals (e.g. `Ctrl-C`) are handled @@ -189,18 +245,22 @@ parent directory to retrieve them on the host. not root. - The interactive GUI (`whippersnap`) is **not** available in the Docker image — it requires a display server and PyQt6, which are not installed. -- **Default rendering** uses **OSMesa** (Mesa's CPU software renderer, provided - by the `libosmesa6` system package). No GPU or `/dev/dri/` device needed. -- **GPU rendering via EGL** works out of the box — `libegl1` is included in the - image. Pass the render device into the container and WhipperSnapPy will - automatically prefer EGL over OSMesa when `/dev/dri/renderD*` is accessible: - ```bash - docker run --rm --init \ - --device /dev/dri/renderD128 \ - -v /path/to/subject:/subject \ - -v /path/to/output:/output \ - whippersnappy \ - -lh /subject/surf/lh.thickness -rh /subject/surf/rh.thickness \ - -sd /subject -o /output/snap4.png +- **Docker CPU rendering** (default — no GPU needed): EGL uses Mesa's llvmpipe + software renderer. The log will show: + ``` + EGL context active — CPU software rendering (llvmpipe (...), ...) + ``` +- **Docker GPU rendering** (`--gpus all`, NVIDIA only): EGL uses the NVIDIA GPU + driver injected by the NVIDIA Container Toolkit. The log will show: + ``` + EGL context active — GPU rendering (...) + ``` +- **Singularity GPU rendering** with `--nv` uses EGL with the NVIDIA GPU + driver injected by Singularity. The log will show: ``` + EGL context active — GPU rendering (...) + ``` + + + diff --git a/Dockerfile b/Dockerfile index c450d0e..72988ff 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,20 +1,29 @@ FROM python:3.11-slim -# libosmesa6 — OSMesa CPU software renderer (default headless path, no GPU needed) -# libegl1 — EGL dispatch library; enables GPU rendering when /dev/dri/renderD* -# is passed via --device (e.g. docker run --device /dev/dri/renderD128) -# libgl1 — base OpenGL dispatch library required by PyOpenGL -# libglib2.0-0, libfontconfig1, libdbus-1-3 — runtime deps for Pillow / font rendering +# Suppress Mesa's shader-cache warning ("Failed to create //.cache …") that +# appears when running as a non-standard user inside Docker where $HOME is +# unset or points to a non-writable directory. +ENV MESA_SHADER_CACHE_DISABLE=1 + +# In order to find NVIDIA GPUs (--gpus all) +ENV NVIDIA_VISIBLE_DEVICES=all +ENV NVIDIA_DRIVER_CAPABILITIES=all + +# libegl1 — GLVND EGL dispatch library (routes to GPU or Mesa llvmpipe) +# libgl1 — base OpenGL dispatch library required by PyOpenGL +# libfontconfig1 — runtime deps for Pillow / font rendering RUN apt-get update && apt-get install -y --no-install-recommends \ - libosmesa6 \ libegl1 \ libgl1 \ - libglib2.0-0 \ - libfontconfig1 \ - libdbus-1-3 && \ + libfontconfig1 && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* +# Register the NVIDIA EGL ICD so libEGL finds the GPU driver +RUN mkdir -p /usr/share/glvnd/egl_vendor.d && \ + echo '{"file_format_version":"1.0.0","ICD":{"library_path":"libEGL_nvidia.so.0"}}' \ + > /usr/share/glvnd/egl_vendor.d/10_nvidia.json + RUN pip install --upgrade pip COPY . /WhipperSnapPy diff --git a/README.md b/README.md index 994da30..1eb321e 100644 --- a/README.md +++ b/README.md @@ -33,20 +33,23 @@ For interactive 3D in Jupyter notebooks: pip install 'whippersnappy[notebook]' ``` -Off-screen (headless) rendering on **Linux** uses a three-path fallback: -1. **GLFW invisible window** — used when a display is available (`DISPLAY` set). -2. **EGL** (GPU, no display needed) — used when no display is detected and a - GPU render device (`/dev/dri/renderD*`) is accessible with `libEGL` installed - (`libegl1` on Debian/Ubuntu). This is the recommended path for SSH servers - with a GPU — no `DISPLAY`, `xvfb`, or OSMesa required. -3. **OSMesa** (CPU software renderer) — final fallback; requires - `sudo apt-get install libosmesa6` (Debian/Ubuntu) or - `sudo dnf install mesa-libOSMesa` (RHEL/Fedora). +Off-screen (headless) rendering on **Linux** uses **EGL** with Mesa's llvmpipe +CPU software renderer — no GPU or display server required. The log reports: +``` +EGL context active — CPU software rendering (llvmpipe (...), ...) +``` +When a GPU is accessible (native install, Docker with `--gpus all`, or +Singularity with `--nv`), EGL selects it automatically: +``` +EGL context active — GPU rendering (...) +``` +OSMesa (`libosmesa6`) is a last-resort CPU fallback used only when EGL +itself cannot initialise (e.g. `libegl1` not installed). On **Windows**, GLFW creates an invisible window; a GPU driver is sufficient. On **macOS**, a real display connection is required (NSGL does not support headless rendering). -See the Docker guide for headless Linux usage. +See the Docker/Singularity guide for container usage. ## Command-Line Usage @@ -217,8 +220,8 @@ See `tutorials/whippersnappy_tutorial.ipynb` for complete notebook examples. ## Docker -The Docker image provides a fully headless rendering environment using -OSMesa (CPU software renderer) — no display server, `xvfb`, or GPU required. +The Docker image provides a fully headless rendering environment using EGL — +CPU software rendering by default, GPU rendering with `--gpus all` (NVIDIA). See DOCKER.md for details. ## API Documentation diff --git a/whippersnappy/cli/whippersnap.py b/whippersnappy/cli/whippersnap.py index 1a5c074..fa628db 100644 --- a/whippersnappy/cli/whippersnap.py +++ b/whippersnappy/cli/whippersnap.py @@ -720,22 +720,21 @@ def run(): # ------------------------------------------------------------------ if QApplication is None: print( - "ERROR: Interactive mode requires PyQt6. " + "Error: Interactive mode requires PyQt6. " "Install with: pip install 'whippersnappy[gui]'", file=sys.stderr, ) - raise RuntimeError( - "Interactive mode requires PyQt6. " - "Install with: pip install 'whippersnappy[gui]'" - ) + sys.exit(1) try: from ..gui import ConfigWindow # noqa: PLC0415 except ModuleNotFoundError as e: - raise RuntimeError( - "Interactive mode requires PyQt6. " - "Install with: pip install 'whippersnappy[gui]'" - ) from e + print( + f"Error: Interactive mode requires PyQt6 ({e}). " + "Install with: pip install 'whippersnappy[gui]'", + file=sys.stderr, + ) + sys.exit(1) current_fthresh_ = args.fthresh current_fmax_ = args.fmax @@ -756,18 +755,22 @@ def run(): # show_window creates the GLFW window, sets up a QTimer render loop, # then calls app.exec() — returns when either window is closed. - show_window( - mesh=mesh_path, - overlay=overlay, - annot=args.annot, - bg_map=bg_map, - roi=roi, - invert=args.invert, - specular=args.specular, - view=view, - app=app, - config_window=config_window, - ) + try: + show_window( + mesh=mesh_path, + overlay=overlay, + annot=args.annot, + bg_map=bg_map, + roi=roi, + invert=args.invert, + specular=args.specular, + view=view, + app=app, + config_window=config_window, + ) + except (RuntimeError, FileNotFoundError) as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) if __name__ == "__main__": diff --git a/whippersnappy/cli/whippersnap1.py b/whippersnappy/cli/whippersnap1.py index 2fe5625..cf96cb9 100644 --- a/whippersnappy/cli/whippersnap1.py +++ b/whippersnappy/cli/whippersnap1.py @@ -43,12 +43,12 @@ import argparse import logging import os +import sys import tempfile import numpy as np if __name__ == "__main__" and __package__ is None: - import sys os.execv(sys.executable, [sys.executable, "-m", "whippersnappy.cli.whippersnap1"] + sys.argv[1:]) from .. import snap1, snap_rotate @@ -309,8 +309,11 @@ def run(): ambient=args.ambient, ) log.info("Snapshot saved to %s (%dx%d)", outpath, img.width, img.height) - except (RuntimeError, FileNotFoundError, ValueError, ImportError) as e: + except ValueError as e: parser.error(str(e)) + except (RuntimeError, FileNotFoundError, ImportError) as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) if __name__ == "__main__": diff --git a/whippersnappy/cli/whippersnap4.py b/whippersnappy/cli/whippersnap4.py index a91fa80..ca37b00 100644 --- a/whippersnappy/cli/whippersnap4.py +++ b/whippersnappy/cli/whippersnap4.py @@ -16,12 +16,12 @@ import argparse import logging import os +import sys import tempfile import numpy as np if __name__ == "__main__" and __package__ is None: - import sys os.execv(sys.executable, [sys.executable, "-m", "whippersnappy.cli.whippersnap4"] + sys.argv[1:]) from .. import snap4 @@ -210,8 +210,11 @@ def run(): logger.info( "Snapshot saved to %s (%dx%d)", args.output_path, img.width, img.height ) - except (RuntimeError, FileNotFoundError, ValueError) as e: + except ValueError as e: parser.error(str(e)) + except (RuntimeError, FileNotFoundError) as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) if __name__ == "__main__": diff --git a/whippersnappy/gl/_headless.py b/whippersnappy/gl/_headless.py index 3a8214a..b3f2b49 100644 --- a/whippersnappy/gl/_headless.py +++ b/whippersnappy/gl/_headless.py @@ -1,30 +1,32 @@ """Headless OpenGL platform detection. This module MUST be imported before any ``import OpenGL.GL`` statement in the -package. On Linux with no display server it sets ``PYOPENGL_PLATFORM`` so -that PyOpenGL resolves function pointers via the correct backend before +package. On Linux with no display it sets ``PYOPENGL_PLATFORM`` so that +PyOpenGL resolves function pointers via the correct backend before ``OpenGL.GL`` is first imported. -Priority chain when no display is detected (Linux only): +Priority chain on Linux when no display is detected +(``DISPLAY`` / ``WAYLAND_DISPLAY`` unset): -1. **EGL + GPU device** — ``/dev/dri/renderD*`` readable and ``libEGL`` - loadable. Sets ``PYOPENGL_PLATFORM=egl`` immediately so that PyOpenGL - binds function pointers via EGL when ``OpenGL.GL`` is first imported. -2. **OSMesa** — CPU software renderer. Sets ``PYOPENGL_PLATFORM=osmesa``. +1. **EGL** — tried first. A lightweight ctypes probe confirms EGL can + actually initialise a display before ``PYOPENGL_PLATFORM=egl`` is set. + When a GPU is accessible (native install) EGL uses it; otherwise EGL falls + back to Mesa's llvmpipe CPU software renderer. Works in Docker and + Singularity without any special flags. +2. **OSMesa** — fallback when EGL cannot initialise at all (e.g. ``libegl1`` + not installed). Sets ``PYOPENGL_PLATFORM=osmesa``. 3. **Neither** — raises ``RuntimeError`` with install instructions. -When ``DISPLAY`` is set (e.g. normal desktop or ``ssh -Y``), ``_headless`` -does not intervene: GLFW is tried first in :func:`init_offscreen_context`. -If GLFW fails (e.g. GLX 3.3 unavailable on the forwarded display), EGL is -attempted only when ``PYOPENGL_PLATFORM`` was already set to ``"egl"`` by -this module at import time — i.e. only for the no-display + EGL-device case. -In all other GLFW-failure scenarios, OSMesa is used as the final fallback. +When ``DISPLAY`` is set the module does not intervene; GLFW is tried first +in :func:`~whippersnappy.gl.context.init_offscreen_context`. + +``PYOPENGL_PLATFORM`` is not consulted by GLFW, so setting it here does not +affect the interactive GUI (``whippersnap``). No OpenGL, GLFW, or other heavy imports are done here — only stdlib. """ import ctypes -import glob import logging import os import sys @@ -32,9 +34,9 @@ logger = logging.getLogger(__name__) -def _osmesa_is_available(): - """Return True if libOSMesa can be loaded via ctypes.""" - for name in ("libOSMesa.so.8", "libOSMesa.so", "OSMesa"): +def _egl_is_available(): + """Return True if libEGL can be loaded via ctypes.""" + for name in ("libEGL.so.1", "libEGL.so"): try: ctypes.CDLL(name) return True @@ -43,76 +45,190 @@ def _osmesa_is_available(): return False -def egl_device_is_available(): - """Return True if libEGL is loadable AND a DRI render node is accessible. +def _egl_context_works(): + """Probe EGL via ctypes to confirm a context can actually be created headlessly. - Checking for ``/dev/dri/renderD*`` existence and readability guards - against Singularity/Docker containers that have EGL libraries installed - but no device nodes bound in — in those cases EGL context creation would - fail and we should fall back to OSMesa instead. + Tries display-independent EGL paths in order: - This function is called both here (at import time) and from - :func:`~whippersnappy.gl.context.init_offscreen_context` (at context - creation time, to decide whether to attempt EGL after GLFW fails). + 1. ``EGL_EXT_device_enumeration`` — enumerate GPU/software devices. + 2. ``EGL_MESA_platform_surfaceless`` — Mesa CPU software rendering + (llvmpipe); no GPU or display server needed. + 3. ``eglGetDisplay(EGL_DEFAULT_DISPLAY)`` — last resort; only succeeds + when a display server (X11/Wayland) is reachable. + + No ``OpenGL.GL`` import and no ``PYOPENGL_PLATFORM`` change are made. + Returns ``True`` only when EGL can actually initialise a display. """ - render_nodes = glob.glob("/dev/dri/renderD*") - if not render_nodes: - logger.debug("EGL: no /dev/dri/renderD* device nodes found — skipping EGL.") + for lib_name in ("libEGL.so.1", "libEGL.so"): + try: + libegl = ctypes.CDLL(lib_name) + break + except OSError: + continue + else: + logger.debug("EGL probe: libEGL not loadable.") + return False + + from contextlib import contextmanager + + @contextmanager + def _suppress_stderr(): + """Suppress C-level stderr (e.g. Mesa/EGL driver warnings).""" + devnull = os.open(os.devnull, os.O_WRONLY) + saved = os.dup(2) + try: + os.dup2(devnull, 2) + yield + finally: + os.dup2(saved, 2) + os.close(saved) + os.close(devnull) + + try: + libegl.eglGetProcAddress.restype = ctypes.c_void_p + libegl.eglGetProcAddress.argtypes = [ctypes.c_char_p] + libegl.eglQueryString.restype = ctypes.c_char_p + libegl.eglQueryString.argtypes = [ctypes.c_void_p, ctypes.c_int] + libegl.eglGetDisplay.restype = ctypes.c_void_p + libegl.eglGetDisplay.argtypes = [ctypes.c_void_p] + libegl.eglInitialize.restype = ctypes.c_bool + libegl.eglInitialize.argtypes = [ + ctypes.c_void_p, + ctypes.POINTER(ctypes.c_int), + ctypes.POINTER(ctypes.c_int), + ] + libegl.eglTerminate.restype = ctypes.c_bool + libegl.eglTerminate.argtypes = [ctypes.c_void_p] + + _EGL_EXTENSIONS = 0x3055 + _EGL_NONE = 0x3038 + _EGL_PLATFORM_DEVICE = 0x313F + _EGL_PLATFORM_SURFACELESS = 0x31DD + + client_exts = libegl.eglQueryString(None, _EGL_EXTENSIONS) or b"" + logger.debug("EGL client extensions: %s", client_exts.decode()) + + no_attribs = (ctypes.c_int * 1)(_EGL_NONE) + + def _get_proc(signature, name): + """Resolve an EGL extension function; return None if unavailable.""" + addr = libegl.eglGetProcAddress(name) + return signature(addr) if addr else None + + def _try_init(dpy): + """Try eglInitialize on dpy with stderr suppressed; terminate on success.""" + if not dpy: + return False + major, minor = ctypes.c_int(0), ctypes.c_int(0) + with _suppress_stderr(): + ok = libegl.eglInitialize(dpy, ctypes.byref(major), ctypes.byref(minor)) + libegl.eglTerminate(dpy) + if ok: + logger.debug("EGL probe: eglInitialize OK (EGL %d.%d).", + major.value, minor.value) + return bool(ok) + + # Resolve eglGetPlatformDisplayEXT once; used by paths 1 and 2. + _GetPlatformDisplayEXT = None + if b"EGL_EXT_platform_base" in client_exts: + _GetPlatformDisplayEXT = _get_proc( + ctypes.CFUNCTYPE(ctypes.c_void_p, + ctypes.c_int, ctypes.c_void_p, + ctypes.POINTER(ctypes.c_int)), + b"eglGetPlatformDisplayEXT", + ) + + # --- Path 1: EGL_EXT_device_enumeration --- + # Try GPU and software devices; prefer GPU when available natively. + if _GetPlatformDisplayEXT and b"EGL_EXT_device_enumeration" in client_exts: + _QueryDevices = _get_proc( + ctypes.CFUNCTYPE(ctypes.c_bool, + ctypes.c_int, ctypes.c_void_p, + ctypes.POINTER(ctypes.c_int)), + b"eglQueryDevicesEXT", + ) + if _QueryDevices: + n = ctypes.c_int(0) + with _suppress_stderr(): + found = _QueryDevices(0, None, ctypes.byref(n)) + logger.debug("EGL probe: %d EGL device(s) found.", n.value if found else 0) + if found and n.value > 0: + devices = (ctypes.c_void_p * n.value)() + with _suppress_stderr(): + _QueryDevices(n.value, devices, ctypes.byref(n)) + for dev in devices: + dpy = _GetPlatformDisplayEXT( + _EGL_PLATFORM_DEVICE, + ctypes.c_void_p(dev), + no_attribs, + ) + if _try_init(dpy): + logger.debug("EGL probe: device enumeration succeeded.") + return True + + # --- Path 2: EGL_MESA_platform_surfaceless --- + # CPU software rendering (llvmpipe) — no GPU needed. + if _GetPlatformDisplayEXT and b"EGL_MESA_platform_surfaceless" in client_exts: + dpy = _GetPlatformDisplayEXT( + _EGL_PLATFORM_SURFACELESS, ctypes.c_void_p(0), no_attribs + ) + if _try_init(dpy): + logger.debug("EGL probe: surfaceless platform succeeded (CPU/llvmpipe).") + return True + + # --- Path 3: EGL_DEFAULT_DISPLAY --- + # Works only when a display server is reachable (DISPLAY set). + if _try_init(libegl.eglGetDisplay(ctypes.c_void_p(0))): + logger.debug("EGL probe: EGL_DEFAULT_DISPLAY succeeded.") + return True + + logger.info("EGL probe: no EGL display could be initialised.") return False - if not any(os.access(n, os.R_OK) for n in render_nodes): - logger.debug("EGL: /dev/dri/renderD* exists but not readable — skipping EGL.") + + except Exception as exc: # noqa: BLE001 + logger.debug("EGL probe: unexpected error (%s).", exc) return False - for name in ("libEGL.so.1", "libEGL.so"): + + +def _osmesa_is_available(): + """Return True if libOSMesa can be loaded via ctypes.""" + for name in ("libOSMesa.so.8", "libOSMesa.so", "OSMesa"): try: ctypes.CDLL(name) - logger.debug("EGL: libEGL found and render node accessible.") return True except OSError: continue - logger.debug("EGL: libEGL not found.") return False -if ( - sys.platform == "linux" - and "PYOPENGL_PLATFORM" not in os.environ - and not os.environ.get("DISPLAY") - and not os.environ.get("WAYLAND_DISPLAY") -): - if egl_device_is_available(): - # Set PYOPENGL_PLATFORM=egl NOW, before OpenGL.GL is imported anywhere. - # PyOpenGL selects its platform backend on first import and cannot be - # changed afterwards; deferring to egl_context.py would mean OpenGL.GL - # is already bound to the wrong backend by the time EGL is tried. - os.environ["PYOPENGL_PLATFORM"] = "egl" - logger.debug( - "No display, EGL + GPU device available — PYOPENGL_PLATFORM=egl set." - ) - elif _osmesa_is_available(): - os.environ["PYOPENGL_PLATFORM"] = "osmesa" - logger.debug( - "No display, no EGL device — PYOPENGL_PLATFORM=osmesa set (CPU rendering)." - ) - else: - raise RuntimeError( - "whippersnappy requires an OpenGL context but none could be found.\n" - "\n" - "No display server detected (DISPLAY / WAYLAND_DISPLAY are unset),\n" - "no accessible GPU render device (/dev/dri/renderD*), and OSMesa\n" - "is not installed.\n" - "\n" - "To fix this, choose one of:\n" - " 1. Install OSMesa (recommended for headless/SSH use):\n" - " Debian/Ubuntu: sudo apt-get install libosmesa6\n" - " RHEL/Fedora: sudo dnf install mesa-libOSMesa\n" - " 2. Use EGL GPU rendering by ensuring /dev/dri/renderD* is accessible\n" - " and libEGL is installed (libegl1 on Debian/Ubuntu).\n" - " 3. Set DISPLAY if a local X server is running:\n" - " export DISPLAY=:1\n" - ) -elif sys.platform == "linux": - _display = os.environ.get("DISPLAY") or os.environ.get("WAYLAND_DISPLAY") - logger.debug( - "Display set (%s) — will try GLFW first.", - _display, +if sys.platform == "linux" and "PYOPENGL_PLATFORM" not in os.environ: + _has_display = ( + bool(os.environ.get("DISPLAY")) or bool(os.environ.get("WAYLAND_DISPLAY")) ) + if not _has_display: + # No display — choose headless backend before OpenGL.GL is imported. + # OpenGL.GL binds its function pointers on first import and cannot be + # re-bound, so PYOPENGL_PLATFORM must be set correctly here. + if _egl_context_works(): + os.environ["PYOPENGL_PLATFORM"] = "egl" + logger.info("No display detected; EGL available — using EGL headless rendering.") + elif _osmesa_is_available(): + os.environ["PYOPENGL_PLATFORM"] = "osmesa" + logger.info("No display detected; EGL unavailable — using OSMesa CPU rendering.") + else: + raise RuntimeError( + "whippersnappy requires an OpenGL context but none could be found.\n" + "\n" + "No display server detected (DISPLAY/WAYLAND_DISPLAY unset),\n" + "EGL initialisation failed, and OSMesa is not installed.\n" + "\n" + "To fix this, choose one of:\n" + " 1. Install EGL (recommended, for GPU or CPU rendering):\n" + " Debian/Ubuntu: sudo apt-get install libegl1\n" + " RHEL/Fedora: sudo dnf install mesa-libEGL\n" + " 2. Install OSMesa (CPU-only alternative):\n" + " Debian/Ubuntu: sudo apt-get install libosmesa6\n" + " RHEL/Fedora: sudo dnf install mesa-libOSMesa\n" + " 3. Set DISPLAY if a local X server is running:\n" + " export DISPLAY=:0\n" + ) diff --git a/whippersnappy/gl/context.py b/whippersnappy/gl/context.py index 60ab95c..47039e1 100644 --- a/whippersnappy/gl/context.py +++ b/whippersnappy/gl/context.py @@ -6,18 +6,18 @@ Context creation tries up to three paths (Linux; macOS/Windows use GLFW only): 1. **GLFW invisible window** — standard path when a display is available. -2. **EGL pbuffer** — headless GPU rendering (Linux, no display needed). - Only used when :mod:`~whippersnappy.gl._headless` set - ``PYOPENGL_PLATFORM=egl`` at import time (no display + accessible - ``/dev/dri/renderD*``). PyOpenGL selects its platform backend on the - first ``import OpenGL.GL`` and cannot be changed afterwards — so EGL is - only safe when it was selected before any ``OpenGL.GL`` import. +2. **EGL pbuffer** — headless rendering (Linux, no display needed). + Used when :mod:`~whippersnappy.gl._headless` set + ``PYOPENGL_PLATFORM=egl`` at import time. ``EGLContext`` handles GPU + and CPU (llvmpipe) fallback internally. 3. **OSMesa** — CPU software renderer (Linux only). - Used when neither GLFW nor EGL succeeds. + Used only when ``PYOPENGL_PLATFORM=osmesa`` was set at import time + (i.e. EGL probe failed — ``libEGL`` not installed). -The :mod:`whippersnappy.gl._headless` guard runs before ``OpenGL.GL`` is -imported and sets ``PYOPENGL_PLATFORM`` to ``"egl"`` or ``"osmesa"`` -as appropriate. +Each backend is selected before ``OpenGL.GL`` is first imported. +PyOpenGL binds its function pointers on first import and cannot be re-bound, +so mixing backends causes silent GL failures. The guard in each branch +ensures only the preselected backend is used. """ # ruff: noqa: I001 — import order is intentional: _headless must precede OpenGL.GL @@ -142,16 +142,20 @@ def init_offscreen_context(width, height): Tries up to three paths on Linux; macOS and Windows use GLFW only. - 1. **GLFW invisible window** — standard path when a display is available. - 2. **EGL pbuffer** — headless GPU rendering (Linux only, no display needed). - Only attempted when :mod:`~whippersnappy.gl._headless` already set - ``PYOPENGL_PLATFORM=egl`` at import time (i.e. no display detected AND - ``/dev/dri/renderD*`` is accessible). This guarantees ``OpenGL.GL`` - was bound to the EGL backend before any GL call; attempting EGL after - ``OpenGL.GL`` has already been imported with a different backend would - silently break function resolution. - 3. **OSMesa** — CPU software renderer (Linux only). Used when neither - GLFW nor EGL succeeds, or when ``PYOPENGL_PLATFORM=osmesa`` was set. + 1. **GLFW invisible window** — used when ``PYOPENGL_PLATFORM`` is not + ``"egl"`` (i.e. a display is available and EGL was not preselected). + Skipped on Linux when EGL was selected at import time. + 2. **EGL** — used when ``PYOPENGL_PLATFORM=egl`` was set by + :mod:`~whippersnappy.gl._headless` at import time. ``EGLContext`` + tries GPU device → surfaceless (llvmpipe) → default display in order, + so it handles CPU fallback internally within EGL. + 3. **OSMesa** — used only when ``PYOPENGL_PLATFORM=osmesa`` was set at + import time (EGL probe failed entirely — ``libEGL`` not installed). + + Each backend is only used when it was preselected before ``OpenGL.GL`` + was imported. PyOpenGL binds its function pointers on first import and + cannot be re-bound — mixing backends (e.g. GLX-bound pointers with an + OSMesa context) causes silent GL failures. Parameters ---------- @@ -174,9 +178,12 @@ def init_offscreen_context(width, height): global _offscreen_context # --- Step 1: GLFW invisible window --- - window = init_window(width, height, visible=False) - if window: - return window + # Skip when PYOPENGL_PLATFORM=egl — OpenGL.GL is already bound to EGL, + # so a GLFW/GLX attempt would print GLX warnings and fail anyway. + if os.environ.get("PYOPENGL_PLATFORM") != "egl": + window = init_window(width, height, visible=False) + if window: + return window # Steps 2 & 3 are Linux-only. if sys.platform != "linux": @@ -187,41 +194,54 @@ def init_offscreen_context(width, height): "On Windows ensure a GPU driver or Mesa opengl32.dll is available." ) - # --- Step 2: EGL headless GPU rendering --- - # Only safe when PYOPENGL_PLATFORM=egl was set by _headless.py before - # OpenGL.GL was imported — meaning the process has no display AND an EGL - # device was found at import time. PyOpenGL binds its platform backend on - # first import and cannot be switched afterwards; importing egl_context.py - # here when PYOPENGL_PLATFORM is already something else (e.g. "osmesa" or - # unset/GLX) would cause silent function-pointer mismatches. + # --- Step 2: EGL headless rendering --- + # PYOPENGL_PLATFORM=egl was set by _headless.py before OpenGL.GL was + # imported (no display detected + EGL probe succeeded). PyOpenGL is + # already bound to EGL; GLFW was intentionally skipped above. + # EGLContext._init_egl tries GPU device → surfaceless (llvmpipe) → + # default display in order, so it handles CPU fallback internally. + # We must NOT fall back to OSMesa here: PYOPENGL_PLATFORM is already + # "egl" and OpenGL.GL function pointers are bound to EGL — using an + # OSMesa context with EGL pointers causes silent GL failures. if os.environ.get("PYOPENGL_PLATFORM") == "egl": - logger.info("GLFW failed — trying EGL headless GPU rendering.") + from .egl_context import EGLContext # noqa: PLC0415 + ctx = EGLContext(width, height) + ctx.make_current() + _offscreen_context = ctx + logger.info("Using EGL headless context (no display required).") + return None + + # --- Step 3: OSMesa software rendering --- + # Only reached when PYOPENGL_PLATFORM=osmesa was set at import time + # (i.e. EGL probe failed entirely — libEGL not installed or unusable). + # Guard is required: if OpenGL.GL was bound to GLX (a display was set but + # GLFW failed) and we created an OSMesa context here, GL function pointers + # would be GLX-bound while the context is OSMesa — causing silent failures. + if os.environ.get("PYOPENGL_PLATFORM") == "osmesa": try: - from .egl_context import EGLContext # noqa: PLC0415 - ctx = EGLContext(width, height) + from .osmesa_context import OSMesaContext # noqa: PLC0415 + ctx = OSMesaContext(width, height) ctx.make_current() _offscreen_context = ctx - logger.info("Using EGL headless context (GPU, no display required).") + logger.info("Using OSMesa headless context (CPU, no display or GPU required).") return None except (ImportError, RuntimeError) as exc: - logger.warning("EGL failed (%s) — falling back to OSMesa.", exc) - - # --- Step 3: OSMesa software rendering --- - logger.info("Trying OSMesa software rendering (CPU).") - try: - from .osmesa_context import OSMesaContext # noqa: PLC0415 - ctx = OSMesaContext(width, height) - ctx.make_current() - _offscreen_context = ctx - logger.info("Using OSMesa headless context (CPU, no display or GPU required).") - return None - except (ImportError, RuntimeError) as exc: - raise RuntimeError( - "Could not create any OpenGL context (tried GLFW, EGL, OSMesa). " - f"Last error: {exc}\n" - "Install OSMesa: sudo apt-get install libosmesa6 (Debian/Ubuntu)\n" - " or sudo dnf install mesa-libOSMesa (RHEL/Fedora)" - ) from exc + raise RuntimeError( + "Could not create any OpenGL context (tried GLFW and OSMesa). " + f"Last error: {exc}" + ) from exc + + raise RuntimeError( + "Could not create a GLFW OpenGL context and no headless backend was " + "preselected. This can happen when DISPLAY is set but the display is " + "not usable (e.g. a broken ssh -X forward) and no EGL or OSMesa " + "library was found at import time. To fix this, install a headless " + "rendering backend:\n" + " - EGL (recommended): sudo apt-get install libegl1\n" + " - OSMesa (fallback): sudo apt-get install libosmesa6\n" + "With either library installed, WhipperSnapPy will select the headless " + "backend automatically on the next run." + ) def terminate_context(window): diff --git a/whippersnappy/gl/egl_context.py b/whippersnappy/gl/egl_context.py index 46b0d6e..2cfb0c0 100644 --- a/whippersnappy/gl/egl_context.py +++ b/whippersnappy/gl/egl_context.py @@ -22,6 +22,7 @@ ctx.destroy() """ +import contextlib import ctypes import logging import os @@ -67,6 +68,26 @@ _EGL_PLATFORM_DEVICE_EXT = 0x313F +@contextlib.contextmanager +def _silence_stderr(): + """Suppress C-level stderr for the duration of the block. + + Mesa writes DRI/EGL warnings (e.g. "failed to open /dev/dri/renderD128: + Permission denied") directly to file descriptor 2, bypassing Python's + logging system. We redirect fd 2 to ``/dev/null`` for calls that are + expected to fail (e.g. probing GPU devices without access) so users don't + see spurious warnings when the fallback path works fine. + """ + devnull_fd = os.open(os.devnull, os.O_WRONLY) + saved = os.dup(2) + try: + os.dup2(devnull_fd, 2) + yield + finally: + os.dup2(saved, 2) + os.close(saved) + os.close(devnull_fd) + class EGLContext: """A headless OpenGL 3.3 Core context backed by an EGL pbuffer + FBO. @@ -175,30 +196,84 @@ def _init_egl(self): client_exts = libegl.eglQueryString(None, _EGL_EXTENSIONS) or b"" logger.debug("EGL client extensions: %s", client_exts.decode()) - has_device_enum = b"EGL_EXT_device_enumeration" in client_exts has_platform_base = b"EGL_EXT_platform_base" in client_exts + has_device_enum = b"EGL_EXT_device_enumeration" in client_exts + has_surfaceless = b"EGL_MESA_platform_surfaceless" in client_exts - display = None - if has_device_enum and has_platform_base: - eglQueryDevicesEXT = self._get_ext_fn( - "eglQueryDevicesEXT", - ctypes.c_bool, - [ctypes.c_int, ctypes.c_void_p, ctypes.POINTER(ctypes.c_int)], - ) + eglGetPlatformDisplayEXT = None + if has_platform_base: eglGetPlatformDisplayEXT = self._get_ext_fn( "eglGetPlatformDisplayEXT", ctypes.c_void_p, [ctypes.c_int, ctypes.c_void_p, ctypes.POINTER(ctypes.c_int)], ) - display = self._open_device_display( - eglQueryDevicesEXT, eglGetPlatformDisplayEXT - ) - if display is None: - logger.debug("Falling back to eglGetDisplay(EGL_DEFAULT_DISPLAY)") - libegl.eglGetDisplay.restype = ctypes.c_void_p - libegl.eglGetDisplay.argtypes = [ctypes.c_void_p] - display = libegl.eglGetDisplay(ctypes.c_void_p(0)) + _EGL_NONE = 0x3038 + no_attribs = (ctypes.c_int * 1)(_EGL_NONE) + + # Build an ordered list of (display_handle, path_label) candidates. + # We try each in order, calling eglInitialize on each; the first that + # succeeds becomes the active display. This means a GPU device that + # fails eglInitialize (e.g. DRI2 screen not available inside Docker) + # is skipped and we fall through to surfaceless (llvmpipe CPU) — + # all within EGL, avoiding the broken EGL→OSMesa mixed-platform issue. + candidates = [] # list of (dpy, label) + + # --- Candidate 1: EGL_EXT_device_enumeration --- + # Hardware GPU devices tried first, software devices last. + # GPU is selected automatically when accessible natively. + if has_device_enum and eglGetPlatformDisplayEXT: + eglQueryDevicesEXT = self._get_ext_fn( + "eglQueryDevicesEXT", + ctypes.c_bool, + [ctypes.c_int, ctypes.c_void_p, ctypes.POINTER(ctypes.c_int)], + ) + for dpy, is_hw in (self._open_device_display( + eglQueryDevicesEXT, eglGetPlatformDisplayEXT) or []): + label = "GPU device" if is_hw else "software device" + candidates.append((dpy, label)) + + # --- Candidate 2: EGL_MESA_platform_surfaceless (CPU/llvmpipe) --- + _EGL_PLATFORM_SURFACELESS = 0x31DD + if eglGetPlatformDisplayEXT and has_surfaceless: + sl_dpy = eglGetPlatformDisplayEXT( + _EGL_PLATFORM_SURFACELESS, ctypes.c_void_p(0), no_attribs + ) + if sl_dpy: + candidates.append((sl_dpy, "surfaceless")) + + # --- Candidate 3: EGL_DEFAULT_DISPLAY (needs X11/Wayland) --- + libegl.eglGetDisplay.restype = ctypes.c_void_p + libegl.eglGetDisplay.argtypes = [ctypes.c_void_p] + def_dpy = libegl.eglGetDisplay(ctypes.c_void_p(0)) + if def_dpy: + candidates.append((def_dpy, "default display")) + + # Try each candidate until one succeeds eglInitialize. + # Suppress C-level stderr during attempts that may produce Mesa DRI + # warnings ("failed to open /dev/dri/...", "failed to create dri2 + # screen") — these are expected when a GPU device is found but not + # accessible (e.g. Singularity without --nv). + display = None + self._display_path = "unknown" + for dpy, label in candidates: + major, minor = ctypes.c_int(0), ctypes.c_int(0) + with _silence_stderr(): + ok = libegl.eglInitialize(dpy, ctypes.byref(major), ctypes.byref(minor)) + if ok: + display = dpy + self._display_path = label + logger.debug("EGL: initialised via %s (EGL %d.%d).", + label, major.value, minor.value) + break + else: + if "GPU" in label: + logger.debug( + "EGL: GPU device found but eglInitialize failed " + "— falling back to CPU software rendering." + ) + else: + logger.debug("EGL: %s failed eglInitialize — trying next.", label) if not display: raise RuntimeError( @@ -207,12 +282,6 @@ def _init_egl(self): ) self._display = display - major, minor = ctypes.c_int(0), ctypes.c_int(0) - if not libegl.eglInitialize( - self._display, ctypes.byref(major), ctypes.byref(minor) - ): - raise RuntimeError("eglInitialize failed.") - logger.debug("EGL %d.%d", major.value, minor.value) if not libegl.eglBindAPI(_EGL_OPENGL_API): raise RuntimeError("eglBindAPI(OpenGL) failed.") @@ -252,27 +321,71 @@ def _init_egl(self): "eglCreateContext for OpenGL 3.3 Core failed. " "Try: MESA_GL_VERSION_OVERRIDE=3.3 MESA_GLSL_VERSION_OVERRIDE=330" ) - logger.info("EGL context created (%dx%d)", self.width, self.height) + logger.debug("EGL context created (%dx%d) via %s display.", + self.width, self.height, self._display_path) def _open_device_display(self, eglQueryDevicesEXT, eglGetPlatformDisplayEXT): - """Enumerate EGL devices and return first usable display pointer.""" + """Enumerate EGL devices and return display candidates ordered GPU-first. + + Returns a list of ``(display_handle, is_hw)`` tuples — hardware GPU + devices first, software devices last. The caller (``_init_egl``) tries + each by calling ``eglInitialize``; the first that succeeds is used. + + Hardware vs software is determined by ``EGL_MESA_device_software`` in + the device extension string — this correctly handles NVIDIA (no DRM + path, but not a software device) and AMD/Intel (has a DRM path). + """ n = ctypes.c_int(0) - if not eglQueryDevicesEXT(0, None, ctypes.byref(n)) or n.value == 0: - logger.warning("eglQueryDevicesEXT: no devices.") + with _silence_stderr(): + found = eglQueryDevicesEXT(0, None, ctypes.byref(n)) + if not found or n.value == 0: + logger.debug("EGL: eglQueryDevicesEXT found no devices.") return None - logger.debug("EGL: %d device(s) found", n.value) + logger.info("EGL: %d device(s) found via enumeration.", n.value) devices = (ctypes.c_void_p * n.value)() - eglQueryDevicesEXT(n.value, devices, ctypes.byref(n)) + with _silence_stderr(): + eglQueryDevicesEXT(n.value, devices, ctypes.byref(n)) no_attribs = (ctypes.c_int * 1)(_EGL_NONE) - for i, dev in enumerate(devices): - dpy = eglGetPlatformDisplayEXT( - _EGL_PLATFORM_DEVICE_EXT, ctypes.c_void_p(dev), no_attribs - ) + + _EGL_DRM_DEVICE_FILE_EXT = 0x3233 # for logging only + _EGL_EXTENSIONS_STR = 0x3055 + try: + addr2 = self._libegl.eglGetProcAddress(b"eglQueryDeviceStringEXT") + _QueryDeviceString = ctypes.CFUNCTYPE( + ctypes.c_char_p, ctypes.c_void_p, ctypes.c_int + )(addr2) if addr2 else None + except Exception: # noqa: BLE001 + _QueryDeviceString = None + + hw_devices = [] + sw_devices = [] + for dev in devices: + is_sw = False + if _QueryDeviceString: + drm_path = _QueryDeviceString(ctypes.c_void_p(dev), _EGL_DRM_DEVICE_FILE_EXT) + dev_exts = _QueryDeviceString(ctypes.c_void_p(dev), _EGL_EXTENSIONS_STR) or b"" + is_sw = b"EGL_MESA_device_software" in dev_exts + logger.debug("EGL device: drm_path=%s sw=%s exts=%s", + drm_path, is_sw, dev_exts.decode() if dev_exts else "") + if is_sw: + sw_devices.append(dev) + else: + hw_devices.append(dev) + + if hw_devices: + logger.debug("EGL: %d hardware device(s), %d software device(s).", + len(hw_devices), len(sw_devices)) + + results = [] + for dev in hw_devices + sw_devices: + with _silence_stderr(): + dpy = eglGetPlatformDisplayEXT( + _EGL_PLATFORM_DEVICE_EXT, ctypes.c_void_p(dev), no_attribs + ) if dpy: - logger.debug("EGL: using device %d", i) - return dpy - return None + results.append((dpy, dev in hw_devices)) + return results # list of (display, is_hw) def make_current(self): @@ -291,6 +404,22 @@ def make_current(self): # via at least one GL call; glGetError() is the cheapest trigger. gl.glGetError() + # Report GPU vs CPU rendering based on the GL renderer string. + renderer = (gl.glGetString(gl.GL_RENDERER) or b"").decode("utf-8", errors="replace") + vendor = (gl.glGetString(gl.GL_VENDOR) or b"").decode("utf-8", errors="replace") + _sw = ("llvmpipe", "softpipe", "swrast", "software") + is_cpu = any(s in renderer.lower() for s in _sw) + if is_cpu: + logger.info( + "EGL context active — CPU software rendering (%s, %s).", + renderer, vendor, + ) + else: + logger.info( + "EGL context active — GPU rendering (%s, %s).", + renderer, vendor, + ) + # Build FBO so rendering is directed off-screen self.fbo = gl.glGenFramebuffers(1) gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, self.fbo) diff --git a/whippersnappy/gl/osmesa_context.py b/whippersnappy/gl/osmesa_context.py index 412d97b..5dd9b1c 100644 --- a/whippersnappy/gl/osmesa_context.py +++ b/whippersnappy/gl/osmesa_context.py @@ -25,6 +25,7 @@ import ctypes import ctypes.util import logging +import os import OpenGL.GL as gl from PIL import Image @@ -97,6 +98,12 @@ def __init__(self, width: int, height: int): self._init_osmesa() def _init_osmesa(self): + # Suppress Mesa's "Failed to create //.cache for shader cache" warning + # that appears when $HOME is unset or non-writable (e.g. inside Docker). + # Only set if the user has not already configured it explicitly. + if "MESA_SHADER_CACHE_DISABLE" not in os.environ: + os.environ["MESA_SHADER_CACHE_DISABLE"] = "1" + lib = _load_libosmesa() self._libosmesa = lib diff --git a/whippersnappy/gl/shaders.py b/whippersnappy/gl/shaders.py index 54c2671..28a799a 100644 --- a/whippersnappy/gl/shaders.py +++ b/whippersnappy/gl/shaders.py @@ -23,6 +23,7 @@ def compile_shader_program(vertex_src, fragment_src): return _gl_shaders.compileProgram( _gl_shaders.compileShader(vertex_src, gl.GL_VERTEX_SHADER), _gl_shaders.compileShader(fragment_src, gl.GL_FRAGMENT_SHADER), + validate=False, ) diff --git a/whippersnappy/snap.py b/whippersnappy/snap.py index 9ad2659..88a06d9 100644 --- a/whippersnappy/snap.py +++ b/whippersnappy/snap.py @@ -275,7 +275,6 @@ def snap1( draw_caption(image, caption, font, orientation, x=cx, y=cy) if outpath: - logger.info("Saving snapshot to %s", outpath) image.save(outpath) return image finally: @@ -516,7 +515,6 @@ def snap4( # If outpath is specified, save to disk if outpath: - logger.info("Saving snapshot to %s", outpath) image.save(outpath) return image