diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..386b11b --- /dev/null +++ b/.dockerignore @@ -0,0 +1,31 @@ +# Git +.git +.gitignore + +# Python +__pycache__ +*.pyc +*.pyo +*.pyd +venv/ +*.egg-info/ + +# Node +node_modules/ +npm-debug.log + +# Build output +build/ +dist/ + +# Environment files +.env +.env.local + +# OS files +.DS_Store +Thumbs.db + +# IDE files +.vscode/ +.idea/ \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8a69e31 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +"prototype/target" diff --git a/prototype/README.md b/prototype/README.md new file mode 100644 index 0000000..548d4e6 --- /dev/null +++ b/prototype/README.md @@ -0,0 +1,127 @@ +# GDB-UI WebSocket Proof-of-Concept + +Prototype for the GSoC 2026 proposal: **GDB-UI — Web-Based GNU Debugger Interface** (C2SI). + +## What this proves + +The proposal's most technically ambitious claim is that replacing GDB-UI's polling +architecture with WebSockets requires a **background reader thread** — not just wiring +up Flask-SocketIO. Here is why: + +GDB MI output falls into three types: + +| Type | Example | When it arrives | +|------|---------|----------------| +| `result` | response to `-break-insert` | synchronously, after your command | +| `notify` | `*stopped` breakpoint hit | **asynchronously — no command triggers it** | +| `console` | program stdout | any time the inferior writes | + +A naive WebSocket implementation that only emits inside the `@socketio.on('gdb_command')` +handler will **silently drop breakpoint hits**, because those arrive between commands. + +This prototype runs a dedicated reader thread per session (`_gdb_reader_thread`) that +continuously polls `pygdbmi` and emits all record types to the correct SocketIO room. +The browser receives the breakpoint hit in real time — without having sent any command. + +## Architecture + +``` +Browser server.py GDB process + | | | + |-- start_debug -----------> | | + | |-- GdbController() ---------->| + | |-- -file-exec-and-symbols --->| + | |-- -break-insert 9 ---------->| + | | [start reader thread] | + | |-- -exec-run ---------------->| + | | | + | | reader thread polls: | + | |<-- *stopped (notify) --------| + |<-- breakpoint_hit ---------| | + | (no command sent!) | | + | | | + |-- gdb_command: -exec-next->| | + | |-- -exec-next --------------->| +``` + +## Files + +``` +gdb-websocket-prototype/ +├── server.py # Flask-SocketIO backend with background reader thread +├── target.c # C program with a deliberate breakpoint location +├── target # compiled binary (build with command below) +└── README.md +``` + +## Setup + +```bash +# 1. Install dependencies +pip install flask flask-socketio pygdbmi eventlet + +# 2. Compile the C target (requires gcc and gdb) +gcc -g -O0 -o target target.c + +# 3. Run the server +python server.py + +# 4. Open http://localhost:5000 +``` + +## What to observe in the browser + +1. Click **Start debug session** +2. Watch the **BREAKPOINT HIT** event arrive in the event log — highlighted in red +3. The label says *"This record arrived unprompted — no command was sent"* +4. Local variables (`x = 10`, `y = 20`) appear automatically in the Variables panel +5. Line 9 highlights in the source panel +6. Use **Continue**, **Step over**, or **Step into** to resume execution + +## Key code: the reader thread + +```python +def _gdb_reader_thread(session_id, controller, socketio): + while True: + responses = controller.get_gdb_response(timeout_sec=0.1) + for response in responses: + msg_type = response.get('type') + if msg_type == 'notify' and response.get('message') == 'stopped': + # Breakpoint hit — arrived with NO client command + socketio.emit('breakpoint_hit', {...}, room=session_id) + elif msg_type == 'notify': + socketio.emit('gdb_async', {...}, room=session_id) + elif msg_type in ('console', 'log'): + socketio.emit('program_output', {...}, room=session_id) +``` + +## Thread-safe session management + +Each browser session gets an isolated `GdbController` instance, created behind a lock +to prevent the check-then-create race condition: + +```python +_sessions_lock = threading.Lock() + +def get_or_create_session(session_id): + with _sessions_lock: # atomic check-and-create + if session_id not in _sessions: + _sessions[session_id] = { + 'controller': GdbController(), + 'active': True, + } + return _sessions[session_id] +``` + +Without the lock, two simultaneous requests can both pass the `if not in` check and +both create a `GdbController`, leaking one GDB subprocess. + +## Relation to main.py in GDB-UI + +The current `main.py` uses: +```python +gdb_controller = GdbController() # global — shared across all requests +``` + +This prototype replaces that with per-session isolation and demonstrates the +WebSocket architecture that Objective 4 of the proposal implements. diff --git a/prototype/diag.py b/prototype/diag.py new file mode 100644 index 0000000..d7611c8 --- /dev/null +++ b/prototype/diag.py @@ -0,0 +1,26 @@ +from pygdbmi.gdbcontroller import GdbController +import os, time + +TARGET = os.path.join(os.path.dirname(os.path.abspath(__file__)), "target.exe") +print("Target:", TARGET) +print("Exists:", os.path.exists(TARGET)) + +try: + ctrl = GdbController() + print("GDB started OK") + + resp = ctrl.write(f"-file-exec-and-symbols {TARGET}", timeout_sec=5) + print("file-exec responses:") + for r in resp: + print(" ", r["type"], r.get("message",""), str(r.get("payload",""))[:80]) + + resp2 = ctrl.write("-break-insert main", timeout_sec=5) + print("break-insert responses:") + for r in resp2: + print(" ", r["type"], r.get("message",""), str(r.get("payload",""))[:80]) + + ctrl.exit() + print("Done") + +except Exception as e: + print("ERROR:", type(e).__name__, e) diff --git a/prototype/requirements.txt b/prototype/requirements.txt new file mode 100644 index 0000000..ce1dea5 --- /dev/null +++ b/prototype/requirements.txt @@ -0,0 +1,4 @@ +flask>=3.0.0 +flask-socketio>=5.3.6 +pygdbmi>=0.10.0.0 +eventlet>=0.35.0 diff --git a/prototype/server.py b/prototype/server.py new file mode 100644 index 0000000..c74e117 --- /dev/null +++ b/prototype/server.py @@ -0,0 +1,584 @@ +""" +GDB-UI WebSocket Proof-of-Concept +================================== +Demonstrates the background reader thread architecture proposed for GDB-UI. + +Key claim being proven: + GDB produces *stopped records asynchronously — not in response to a command. + A naive implementation that only emits responses to commands will silently + drop breakpoint hits. This prototype runs a dedicated reader thread per + session that continuously polls pygdbmi and emits ALL output types, + including async notify records, to the correct SocketIO room. + +Run: + python server.py + +Then open http://localhost:5000 in a browser. +""" + +import os +import time +import threading +import json + +from flask import Flask, render_template_string, session +from flask_socketio import SocketIO, emit, join_room +from pygdbmi.gdbcontroller import GdbController + +# ── App setup ──────────────────────────────────────────────────────────────── + +app = Flask(__name__) +app.config["SECRET_KEY"] = "gdb-ui-prototype-secret" + +# eventlet async mode required for background threads with SocketIO +socketio = SocketIO(app, async_mode="threading", cors_allowed_origins="*") + +_binary_name = "target.exe" if os.name == "nt" else "target" +TARGET_BINARY = os.path.join(os.path.dirname(os.path.abspath(__file__)), _binary_name) +TARGET_BINARY_GDB = TARGET_BINARY.replace("\\", "/") +BREAKPOINT_LINE = 9 + +# ── Session store ───────────────────────────────────────────────────────────── +# Maps session_id -> { controller, thread, active } +# This is the architecture proposed for main.py — one GdbController per user, +# isolated behind a lock so concurrent session creation is race-free. + +_sessions: dict = {} +_sessions_lock = threading.Lock() + + +def get_or_create_session(session_id: str) -> dict: + """ + Thread-safe session factory. + + The check-then-create pattern is NOT atomic without a lock even in CPython: + two simultaneous requests can both pass the 'if not in' check and both + create a GdbController, leaking one subprocess. The lock prevents this. + """ + with _sessions_lock: + if session_id not in _sessions: + _sessions[session_id] = { + "controller": None, # created lazily on first GDB command + "thread": None, + "active": False, + } + return _sessions[session_id] + + +# ── Background GDB reader thread ────────────────────────────────────────────── + +def _gdb_reader_thread(session_id: str, controller: GdbController) -> None: + """ + The critical piece of the WebSocket architecture. + + pygdbmi classifies GDB MI output into three types: + - 'result' — synchronous response to a MI command (-break-insert, etc.) + - 'notify' — async record GDB produces on its own (*stopped, =thread-created) + - 'console' — plain text from GDB or the inferior program + + A naive implementation that only emits inside the @socketio.on('gdb_command') + handler will silently drop 'notify' records — including breakpoint hits — + because those arrive between commands, not in response to them. + + This thread runs continuously and emits every record type to the correct + SocketIO room, ensuring the frontend receives breakpoint hits in real time. + """ + print(f"[reader] thread started for session {session_id[:8]}") + + while True: + # Check if this session is still alive + with _sessions_lock: + sess = _sessions.get(session_id) + if not sess or not sess.get("active"): + break + + try: + responses = controller.get_gdb_response( + timeout_sec=0.1, raise_error_on_timeout=False + ) + except Exception as e: + print(f"[reader] error reading from GDB: {e}") + break + + for response in responses: + msg_type = response.get("type") + message = response.get("message", "") + payload = response.get("payload") + + if msg_type == "notify" and message == "stopped": + # ── This is the async record the frontend cares most about ── + # It arrives here because GDB hit the breakpoint — the client + # sent NO command to trigger this. Polling would never catch it + # in real time. The reader thread does. + reason = payload.get("reason", "unknown") if isinstance(payload, dict) else "unknown" + frame = payload.get("frame", {}) if isinstance(payload, dict) else {} + socketio.emit("breakpoint_hit", { + "reason": reason, + "line": frame.get("line", "?"), + "func": frame.get("func", "?"), + "file": os.path.basename(frame.get("fullname", "?")), + "variables": _get_locals(controller), + }, room=session_id) + print(f"[reader] emitted breakpoint_hit to room {session_id[:8]} — {reason} at line {frame.get('line','?')}") + + elif msg_type == "notify": + # Emit all other async records too (thread events, library loads) + socketio.emit("gdb_async", { + "message": message, + "payload": str(payload)[:200], + }, room=session_id) + + elif msg_type in ("console", "log"): + # GDB console output and inferior program output + socketio.emit("program_output", { + "text": str(payload), + }, room=session_id) + + elif msg_type == "result": + # Synchronous result record — emit for completeness + socketio.emit("gdb_result", { + "message": message, + "payload": str(payload)[:200], + }, room=session_id) + + +def _get_locals(controller: GdbController) -> dict: + """Fetch local variables at the current frame.""" + try: + resp = controller.write("-stack-list-locals --simple-values", timeout_sec=2) + for r in resp: + if r.get("type") == "result" and r.get("message") == "done": + locals_list = r.get("payload", {}).get("locals", []) + return {v["name"]: v.get("value", "?") for v in locals_list} + except Exception: + pass + return {} + + +# ── SocketIO event handlers ─────────────────────────────────────────────────── + +@socketio.on("connect") +def on_connect(): + sid = session.get("sid") or "anon" + join_room(sid) + print(f"[connect] client joined room {sid[:8]}") + emit("connected", {"session_id": sid[:8]}) + + +@socketio.on("start_debug") +def on_start_debug(data): + """ + Client requests a new debug session. + Creates an isolated GdbController, sets a breakpoint, and starts the + background reader thread before running the program. + """ + sid = session.get("sid", "anon") + sess = get_or_create_session(sid) + + # Tear down any existing GDB session for this user + if sess.get("controller"): + try: + sess["controller"].exit() + except Exception: + pass + sess["active"] = False + + # Create a fresh isolated GdbController for this session + controller = GdbController() + sess["controller"] = controller + sess["active"] = True + + # Load the binary + controller.write(f"-file-exec-and-symbols {TARGET_BINARY_GDB}", timeout_sec=3) + + # On Windows, set breakpoint by function name — more reliable than line numbers + # before the symbol table is fully indexed + bp_resp = controller.write(f"-break-insert {BREAKPOINT_LINE}", timeout_sec=3) + bp_ok = any(r.get("message") == "done" for r in bp_resp) + + emit("session_ready", { + "binary": os.path.basename(TARGET_BINARY), + "breakpoint": BREAKPOINT_LINE, + "bp_set": bp_ok, + }) + print(f"[start_debug] session ready for {sid[:8]}, breakpoint set={bp_ok}") + + # Start the background reader thread BEFORE running the program. + # This is essential: if we run first and start the reader after, + # we can miss the *stopped record entirely. + reader = threading.Thread( + target=_gdb_reader_thread, + args=(sid, controller), + daemon=True, + name=f"gdb-reader-{sid[:8]}", + ) + sess["thread"] = reader + reader.start() + + # Run the program — execution is async; the reader thread will catch *stopped + controller.write("-exec-run", timeout_sec=2) + emit("program_running", {"message": "Program running — waiting for breakpoint..."}) + + +@socketio.on("gdb_command") +def on_gdb_command(data): + """ + Handle an arbitrary MI command from the frontend. + The reader thread handles async output; this handler handles synchronous + command-response flow (step, continue, etc.). + """ + sid = session.get("sid", "anon") + with _sessions_lock: + sess = _sessions.get(sid) + + if not sess or not sess.get("controller"): + emit("error", {"message": "No active debug session"}) + return + + command = data.get("command", "") + print(f"[command] {sid[:8]} -> {command}") + + try: + resp = sess["controller"].write(command, timeout_sec=3) + # Reader thread handles async records; only emit the sync result here + for r in resp: + if r.get("type") == "result": + emit("gdb_result", { + "command": command, + "message": r.get("message"), + "payload": str(r.get("payload", ""))[:300], + }) + except Exception as e: + emit("error", {"message": str(e)}) + + +@socketio.on("disconnect") +def on_disconnect(): + sid = session.get("sid", "anon") + with _sessions_lock: + sess = _sessions.get(sid) + if sess: + sess["active"] = False + ctrl = sess.get("controller") + if ctrl: + try: + ctrl.exit() + except Exception: + pass + del _sessions[sid] + print(f"[disconnect] cleaned up session {sid[:8]}") + + +# ── Frontend ────────────────────────────────────────────────────────────────── + +HTML = """ + + + +GDB-UI WebSocket Prototype + + + +
+

GDB-UI — WebSocket Prototype

+ disconnected + Proof of concept for GSoC 2026 proposal +
+ +
+ +
+ + + + + + + Click “Start debug session” to begin +
+ + +
+
+ Event log + 0 events +
+
+
+ + +
+
+ Local variables + +
+
+ Variables will appear when a breakpoint is hit +
+
+ + +
+
Source — target.c
+
+
{{ source }}
+
+
+ + +
+
+ GDB output + +
+
+
+ +
+ + + + + +""" + + +@app.route("/") +def index(): + import uuid + # Assign a stable session ID per browser session + if "sid" not in session: + session["sid"] = str(uuid.uuid4()) + + # Read the C source to render in the frontend + src_path = os.path.join(os.path.dirname(__file__), "target.c") + try: + with open(src_path) as f: + source = f.read() + except FileNotFoundError: + source = "/* target.c not found — run: gcc -g -O0 -o target target.c */" + + html = HTML.replace("{{ source }}", source) + html = html.replace("{{ BREAKPOINT_LINE }}", str(BREAKPOINT_LINE)) + return html + + +# ── Entry point ─────────────────────────────────────────────────────────────── + +if __name__ == "__main__": + if __name__ == "__main__": + binary = TARGET_BINARY + if not os.path.exists(binary): + print("ERROR: target binary not found.") + print("Run: gcc -g -O0 -o target target.c") + raise SystemExit(1) + + print("=" * 60) + print("GDB-UI WebSocket Proof-of-Concept") + print("=" * 60) + print(f" Target binary : {binary}") + print(f" Breakpoint : target.c line {BREAKPOINT_LINE}") + print(f" URL : http://localhost:5000") + print() + print("What to observe:") + print(" 1. Click 'Start debug session'") + print(" 2. Watch the BREAKPOINT HIT event arrive in the event log") + print(" — no command was sent to trigger it") + print(" 3. Local variables appear automatically (x=10, y=20)") + print(" 4. Line 9 highlights in the source panel") + print(" 5. Click 'Continue' or 'Step over' to resume") + print("=" * 60) + + socketio.run(app, host="0.0.0.0", port=5000, debug=False) diff --git a/prototype/target.c b/prototype/target.c new file mode 100644 index 0000000..98752e2 --- /dev/null +++ b/prototype/target.c @@ -0,0 +1,13 @@ +#include + +int add(int a, int b) { + return a + b; +} + +int main() { + int x = 10; + int y = 20; + int result = add(x, y); /* breakpoint will land here */ + printf("result = %d\n", result); + return 0; +} diff --git a/prototype/target.exe b/prototype/target.exe new file mode 100644 index 0000000..9ec7024 Binary files /dev/null and b/prototype/target.exe differ