From a660a36968a7006cc1f2f3764e4ec106a7348f6a Mon Sep 17 00:00:00 2001 From: Kostas Karachalios Date: Fri, 20 Feb 2026 20:00:02 +0100 Subject: [PATCH 1/3] fix: add 429 rate-limit retry with countdown to HTTP transport When the Anthropic API returns HTTP 429, the app now retries up to 5 times honoring the Retry-After header, with a visible per-second countdown via on_retry callback. Both _http_stream_sse (streaming) and _http_json (model listing) are covered. Non-429 errors still raise immediately. Connection-timeout retries remain independent. --- agent/engine.py | 5 + agent/model.py | 157 +++++-- ...2-20-429-rate-limit-handling-brainstorm.md | 62 +++ ...026-02-20-fix-429-rate-limit-retry-plan.md | 437 ++++++++++++++++++ tests/conftest.py | 4 +- tests/test_model_complex.py | 6 +- tests/test_rate_limit.py | 369 +++++++++++++++ 7 files changed, 994 insertions(+), 46 deletions(-) create mode 100644 docs/brainstorms/2026-02-20-429-rate-limit-handling-brainstorm.md create mode 100644 docs/plans/2026-02-20-fix-429-rate-limit-retry-plan.md create mode 100644 tests/test_rate_limit.py diff --git a/agent/engine.py b/agent/engine.py index 8bd2b65a..f04845df 100644 --- a/agent/engine.py +++ b/agent/engine.py @@ -325,6 +325,9 @@ def _solve_recursive( # Stream thinking/text deltas only for top-level calls if on_content_delta and depth == 0 and hasattr(model, "on_content_delta"): model.on_content_delta = on_content_delta + # Rate-limit messages fire at all depths + if on_event and hasattr(model, "on_retry"): + model.on_retry = lambda msg: self._emit(msg, on_event) try: turn = model.complete(conversation) except ModelError as exc: @@ -333,6 +336,8 @@ def _solve_recursive( finally: if hasattr(model, "on_content_delta"): model.on_content_delta = None + if hasattr(model, "on_retry"): + model.on_retry = None elapsed = time.monotonic() - t0 if replay_logger: diff --git a/agent/model.py b/agent/model.py index b82e5f28..cdebd7b8 100644 --- a/agent/model.py +++ b/agent/model.py @@ -2,6 +2,7 @@ import json import socket +import time import urllib.error import urllib.request from dataclasses import dataclass, field @@ -95,37 +96,83 @@ def _extract_content(content: object) -> str: return "" +def _parse_retry_after(exc: urllib.error.HTTPError, default: int = 5) -> int: + """Parse Retry-After header from an HTTPError, clamped to [1, 120].""" + raw = exc.headers.get("Retry-After") + if raw is None: + return default + try: + return max(1, min(120, int(raw))) + except (ValueError, TypeError): + return default + + +def _notify_retry(on_retry: "Callable[[str], None] | None", msg: str) -> None: + """Fire on_retry callback, swallowing any exception.""" + if on_retry is not None: + try: + on_retry(msg) + except Exception: + pass + + +def _sleep_with_countdown( + seconds: int, + attempt: int, + max_attempts: int, + on_retry: "Callable[[str], None] | None", +) -> None: + """Sleep for *seconds*, firing on_retry with a countdown each second.""" + for remaining in range(seconds, 0, -1): + _notify_retry( + on_retry, + f"Rate limited (attempt {attempt}/{max_attempts}). " + f"Retrying in {remaining}s...", + ) + time.sleep(1) + + def _http_json( url: str, method: str, headers: dict[str, str], payload: dict[str, Any] | None = None, timeout_sec: int = 90, + max_rate_limit_retries: int = 5, ) -> dict[str, Any]: - req = urllib.request.Request( - url=url, - data=(json.dumps(payload).encode("utf-8") if payload is not None else None), - headers=headers, - method=method, - ) - try: - with urllib.request.urlopen(req, timeout=timeout_sec) as resp: - raw = resp.read().decode("utf-8", errors="replace") - except urllib.error.HTTPError as exc: # pragma: no cover - network path - body = exc.read().decode("utf-8", errors="replace") - raise ModelError(f"HTTP {exc.code} calling {url}: {body}") from exc - except urllib.error.URLError as exc: # pragma: no cover - network path - raise ModelError(f"Connection error calling {url}: {exc}") from exc - except OSError as exc: # pragma: no cover - bare socket.timeout, etc. - raise ModelError(f"Network error calling {url}: {exc}") from exc + for rate_limit_attempt in range(1, max_rate_limit_retries + 1): + req = urllib.request.Request( + url=url, + data=(json.dumps(payload).encode("utf-8") if payload is not None else None), + headers=headers, + method=method, + ) + try: + with urllib.request.urlopen(req, timeout=timeout_sec) as resp: + raw = resp.read().decode("utf-8", errors="replace") + except urllib.error.HTTPError as exc: + if exc.code == 429: + retry_after = _parse_retry_after(exc) + _sleep_with_countdown(retry_after, rate_limit_attempt, max_rate_limit_retries, None) + continue + body = exc.read().decode("utf-8", errors="replace")[:8192] + raise ModelError(f"HTTP {exc.code} calling {url}: {body}") from exc + except urllib.error.URLError as exc: # pragma: no cover - network path + raise ModelError(f"Connection error calling {url}: {exc}") from exc + except OSError as exc: # pragma: no cover - bare socket.timeout, etc. + raise ModelError(f"Network error calling {url}: {exc}") from exc - try: - parsed = json.loads(raw) - except json.JSONDecodeError as exc: - raise ModelError(f"Non-JSON response from {url}: {raw[:500]}") from exc - if not isinstance(parsed, dict): - raise ModelError(f"Unexpected non-object JSON from {url}: {type(parsed)!r}") - return parsed + try: + parsed = json.loads(raw) + except json.JSONDecodeError as exc: + raise ModelError(f"Non-JSON response from {url}: {raw[:500]}") from exc + if not isinstance(parsed, dict): + raise ModelError(f"Unexpected non-object JSON from {url}: {type(parsed)!r}") + return parsed + + raise ModelError( + f"HTTP 429 calling {url}: rate limited, exhausted {max_rate_limit_retries} retries" + ) def _extend_socket_timeout(resp: Any, timeout: float) -> None: @@ -212,32 +259,54 @@ def _http_stream_sse( stream_timeout: float = 120, max_retries: int = 3, on_sse_event: "Callable[[str, dict[str, Any]], None] | None" = None, + on_retry: "Callable[[str], None] | None" = None, + max_rate_limit_retries: int = 5, ) -> list[tuple[str, dict[str, Any]]]: """Stream an SSE endpoint with first-byte timeout and retry logic.""" data = json.dumps(payload).encode("utf-8") - last_exc: Exception | None = None - for attempt in range(max_retries): - req = urllib.request.Request(url=url, data=data, headers=headers, method=method) - try: - resp = urllib.request.urlopen(req, timeout=first_byte_timeout) - except urllib.error.HTTPError as exc: - body = exc.read().decode("utf-8", errors="replace") - raise ModelError(f"HTTP {exc.code} calling {url}: {body}") from exc - except (socket.timeout, urllib.error.URLError, OSError) as exc: - # Timeout or connection error — retry - last_exc = exc + for rate_limit_attempt in range(1, max_rate_limit_retries + 1): + got_429 = False + retry_after = 0 + + last_exc: Exception | None = None + for attempt in range(max_retries): + req = urllib.request.Request(url=url, data=data, headers=headers, method=method) + try: + resp = urllib.request.urlopen(req, timeout=first_byte_timeout) + except urllib.error.HTTPError as exc: + if exc.code == 429: + retry_after = _parse_retry_after(exc) + got_429 = True + break + body = exc.read().decode("utf-8", errors="replace")[:8192] + raise ModelError(f"HTTP {exc.code} calling {url}: {body}") from exc + except (socket.timeout, urllib.error.URLError, OSError) as exc: + # Timeout or connection error — retry + last_exc = exc + continue + + # First byte received — extend timeout for the rest of the stream + _extend_socket_timeout(resp, stream_timeout) + try: + return _read_sse_events(resp, on_sse_event=on_sse_event) + finally: + resp.close() + else: + if not got_429: + raise ModelError( + f"Timed out after {max_retries} attempts calling {url}: {last_exc}" + ) + + if got_429: + _sleep_with_countdown(retry_after, rate_limit_attempt, max_rate_limit_retries, on_retry) continue - # First byte received — extend timeout for the rest of the stream - _extend_socket_timeout(resp, stream_timeout) - try: - return _read_sse_events(resp, on_sse_event=on_sse_event) - finally: - resp.close() + # If we reach here without got_429 and without returning, something went wrong + break # pragma: no cover raise ModelError( - f"Timed out after {max_retries} attempts calling {url}: {last_exc}" + f"HTTP 429 calling {url}: rate limited, exhausted {max_rate_limit_retries} retries" ) @@ -577,6 +646,7 @@ class OpenAICompatibleModel: strict_tools: bool = True tool_defs: list[dict[str, Any]] | None = None on_content_delta: Callable[[str, str], None] | None = None + on_retry: Callable[[str], None] | None = None def _is_reasoning_model(self) -> bool: """OpenAI reasoning models (o-series, gpt-5 series) have different API constraints.""" @@ -656,6 +726,7 @@ def _forward_delta(_event_type: str, data: dict[str, Any]) -> None: payload=payload, stream_timeout=self.timeout_sec, on_sse_event=sse_cb, + on_retry=self.on_retry, ) parsed = _accumulate_openai_stream(events) except ModelError as exc: @@ -675,6 +746,7 @@ def _forward_delta(_event_type: str, data: dict[str, Any]) -> None: payload=payload, stream_timeout=self.timeout_sec, on_sse_event=sse_cb, + on_retry=self.on_retry, ) parsed = _accumulate_openai_stream(events) @@ -771,6 +843,7 @@ class AnthropicModel: timeout_sec: int = 300 tool_defs: list[dict[str, Any]] | None = None on_content_delta: Callable[[str, str], None] | None = None + on_retry: Callable[[str], None] | None = None def create_conversation(self, system_prompt: str, initial_user_message: str) -> Conversation: messages: list[Any] = [ @@ -849,6 +922,7 @@ def _forward_delta(_event_type: str, data: dict[str, Any]) -> None: payload=payload, stream_timeout=self.timeout_sec, on_sse_event=sse_cb, + on_retry=self.on_retry, ) parsed = _accumulate_anthropic_stream(events) except ModelError as exc: @@ -869,6 +943,7 @@ def _forward_delta(_event_type: str, data: dict[str, Any]) -> None: payload=payload, stream_timeout=self.timeout_sec, on_sse_event=sse_cb, + on_retry=self.on_retry, ) parsed = _accumulate_anthropic_stream(events) diff --git a/docs/brainstorms/2026-02-20-429-rate-limit-handling-brainstorm.md b/docs/brainstorms/2026-02-20-429-rate-limit-handling-brainstorm.md new file mode 100644 index 00000000..5275b42c --- /dev/null +++ b/docs/brainstorms/2026-02-20-429-rate-limit-handling-brainstorm.md @@ -0,0 +1,62 @@ +# Brainstorm: Graceful 429 Rate Limit Handling + +**Date:** 2026-02-20 +**Status:** Ready for planning + +## What We're Building + +Retry logic for HTTP 429 (rate limit) responses from the Anthropic API. When the +app hits a rate limit, it should wait using the `Retry-After` header and retry +automatically — up to 5 attempts — with a visible countdown in the TUI so the +user knows what's happening. + +## Why This Approach + +**Problem:** The app currently treats 429 as a fatal error. `_http_stream_sse()` +catches `HTTPError` and immediately raises `ModelError`, which terminates the +engine's solve loop. The user sees an error and has to restart. + +**Approach A (inline retry)** was chosen over a retry decorator or model-layer +retry because: + +- The existing `_http_stream_sse()` already has a retry loop for connection + errors — extending it for 429 is natural +- Only two call sites need updating (`_http_stream_sse` and `_http_json`), which + doesn't justify a new abstraction +- Keeps HTTP retry concerns in the HTTP transport layer where they belong + +## Key Decisions + +1. **Scope: Anthropic 429 only (for now).** Other providers and other transient + errors (5xx) are out of scope. The shared code path means expanding later is + straightforward. + +2. **Retry strategy: Respect `Retry-After` header, max 5 attempts.** Anthropic + includes this header in 429 responses. We honor it directly rather than + implementing our own exponential backoff. + +3. **UX: Visible countdown.** During the wait, show a live countdown + (e.g., "Rate limited. Retrying in 8s... 7s... 6s...") in the TUI event stream + so the user knows the app hasn't hung. + +4. **Both call paths covered.** Retry logic goes in both `_http_stream_sse()` + (streaming LLM calls) and `_http_json()` (model listing). + +5. **Failure mode: ModelError as today.** After 5 failed attempts, raise the same + `ModelError` the app raises now. No new exception subclasses. + +6. **Implementation: Inline in transport functions.** No decorators, no new + abstractions. Extend the existing retry pattern. + +## Affected Code + +| File | Function | Change | +|------|----------|--------| +| `agent/model.py` | `_http_stream_sse()` | Add 429 detection in HTTPError handler, parse Retry-After, sleep with countdown, loop | +| `agent/model.py` | `_http_json()` | Same pattern | +| `agent/model.py` | Both functions' signatures | Add optional `on_retry` callback for countdown display | +| `agent/engine.py` | Call sites of above | Thread `on_event` through as `on_retry` callback | + +## Open Questions + +None — all questions resolved during brainstorming. diff --git a/docs/plans/2026-02-20-fix-429-rate-limit-retry-plan.md b/docs/plans/2026-02-20-fix-429-rate-limit-retry-plan.md new file mode 100644 index 00000000..2e6e3c99 --- /dev/null +++ b/docs/plans/2026-02-20-fix-429-rate-limit-retry-plan.md @@ -0,0 +1,437 @@ +--- +title: "fix: Graceful 429 rate limit retry with countdown" +type: fix +status: completed +date: 2026-02-20 +deepened: 2026-02-20 +brainstorm: docs/brainstorms/2026-02-20-429-rate-limit-handling-brainstorm.md +--- + +# fix: Graceful 429 Rate Limit Retry with Countdown + +## Enhancement Summary + +**Deepened on:** 2026-02-20 +**Sections enhanced:** 6 +**Research agents used:** architecture-strategist, kieran-python-reviewer, +code-simplicity-reviewer, performance-oracle, security-sentinel, +pattern-recognition-specialist, best-practices-researcher, +framework-docs-researcher + +### Key Improvements + +1. Extract `_parse_retry_after()` and `_sleep_with_countdown()` as pure/reusable + helpers -- keeps the retry loop body clean and individually testable +2. `on_retry` fires at ALL depths (not depth-gated like `on_content_delta`) so + rate-limit messages always reach the TUI regardless of recursion depth +3. Truncate HTTP error response bodies to 8 KB before embedding in `ModelError` + messages to prevent memory issues from large error payloads + +### New Considerations Discovered + +- Anthropic uses HTTP 529 for `overloaded_error` (separate from 429 + `rate_limit_error`). Out of scope for v1 but the architecture accommodates it + trivially by adding `exc.code in (429, 529)`. +- The worst-case hang is 5 rate-limit retries x 120s cap = ~10 minutes per call. + Document as known limitation; a total wall-clock timeout can be added later. +- `on_retry` callback must be wrapped in try/except (matching `_emit` pattern) + so a broken callback never kills the retry loop. +- `import time` is not currently in `agent/model.py` and must be added. +- Tests must patch `time.sleep` via `unittest.mock.patch("agent.model.time.sleep")` + to avoid actually sleeping. + +## Overview + +When the app receives an HTTP 429 from the Anthropic API, it crashes the solve +loop. This plan adds retry logic with `Retry-After` header support and a visible +TUI countdown, so the user sees what's happening and the app recovers +automatically. + +## Problem Statement + +`_http_stream_sse()` catches `urllib.error.HTTPError` and immediately raises +`ModelError` regardless of status code. The engine's `_solve_recursive()` catches +that `ModelError` and terminates the task. The user sees an error and must +re-submit. Rate limits are transient -- the app should wait and retry. + +## Proposed Solution + +Add 429 detection and retry-with-countdown inline in both transport functions +(`_http_stream_sse` and `_http_json`). Honor the `Retry-After` header. Fire +status messages through an `on_retry` callback for TUI display. + +**Key design decisions (from brainstorm):** + +- **Generic 429 handling** in shared transport functions. The code checks + `exc.code == 429` regardless of provider. "Anthropic only" refers to testing + scope, not implementation restriction. +- **Separate retry budget** from connection-timeout retries. 429 retries (max 5) + and connection retries (existing max 3) are independent counters. +- **`on_retry` callback signature:** `Callable[[str], None]` -- receives a + human-readable status message. Matches the `on_event` pattern. The transport + function generates the string; the caller decides how to display it. +- **`on_retry` lifecycle:** Follows the `on_content_delta` pattern -- set as a + field on model instances, managed by the engine before/after `complete()`. +- **`_http_json()` retries silently** for now. No callback threading through the + 4-function model-listing chain. Countdown display is for streaming calls only. +- **No parallel worker coordination.** Each worker retries independently. + Document as a known limitation. + +## Technical Considerations + +### `Retry-After` header parsing + +- Parse as integer seconds only (Anthropic's format). If missing or unparseable, + fall back to **5 seconds**. +- **Floor:** 1 second. **Cap:** 120 seconds. Prevents tight loops and absurd waits. +- Access via `exc.headers.get("Retry-After")` on `urllib.error.HTTPError`. + +#### Research Insights + +**Anthropic API behavior (confirmed from official docs):** + +- 429 error body: `{"type": "error", "error": {"type": "rate_limit_error", "message": "..."}}` +- `Retry-After` header: integer seconds format (not HTTP-date) +- Additional headers on every response: `anthropic-ratelimit-requests-limit`, + `anthropic-ratelimit-requests-remaining`, `anthropic-ratelimit-requests-reset`, + and matching `tokens-*` variants. These can inform proactive throttling in a + future enhancement but are not needed for v1. +- HTTP 529 (`overloaded_error`) is a separate status code. Not handled in v1. + +**Implementation: Extract `_parse_retry_after()` as a pure function:** + +```python +def _parse_retry_after(exc: urllib.error.HTTPError, default: int = 5) -> int: + """Parse Retry-After header from an HTTPError, clamped to [1, 120].""" + raw = exc.headers.get("Retry-After") + if raw is None: + return default + try: + return max(1, min(120, int(raw))) + except (ValueError, TypeError): + return default +``` + +This is independently testable without mocking any HTTP machinery. + +### Retry loop architecture in `_http_stream_sse()` + +The existing function has a retry loop for connection errors. The 429 retry wraps +the entire existing attempt logic as an outer concern: + +``` +for rate_limit_attempt in range(max_rate_limit_retries): # new outer loop (5) + for attempt in range(max_retries): # existing inner loop (3) + try: + resp = urlopen(req, ...) + except HTTPError as exc: + if exc.code == 429: + break # break inner, trigger outer retry with sleep + raise ModelError(...) + except (timeout, URLError, OSError): + continue # existing connection retry + else: + # connection retries exhausted + raise ModelError(...) + if got_429: + sleep with countdown + continue # outer loop retry + return events # success +``` + +#### Research Insights + +**Nested loop sentinel pattern:** The `got_429` sentinel variable mediates +between the inner `break` and the outer `continue`. This is the simplest pattern +that keeps the two retry budgets independent. An alternative (extracting the +inner loop into `_urlopen_with_retries()`) was considered but adds a function +boundary without reducing complexity for the current scope. + +**Callback safety:** The `on_retry` callback must be wrapped in try/except to +prevent a broken callback from killing the retry loop: + +```python +def _notify_retry(on_retry: "Callable[[str], None] | None", msg: str) -> None: + if on_retry is not None: + try: + on_retry(msg) + except Exception: + pass # never let a callback kill the retry loop +``` + +**Sleep with countdown helper:** + +```python +def _sleep_with_countdown( + seconds: int, + attempt: int, + max_attempts: int, + on_retry: "Callable[[str], None] | None", +) -> None: + """Sleep for `seconds`, firing on_retry with a countdown each second.""" + for remaining in range(seconds, 0, -1): + _notify_retry( + on_retry, + f"Rate limited (attempt {attempt}/{max_attempts}). " + f"Retrying in {remaining}s...", + ) + time.sleep(1) +``` + +### Threading safety + +- `time.sleep()` blocks the calling thread. Safe on main thread (TUI refresh + runs on its own thread). Safe on worker threads (they're dedicated to their + task). +- `on_retry` callback fires from the sleeping thread. The existing `on_event` + callback is already called from worker threads (in `_run_one_tool`), so the + TUI's console locking handles concurrent output. +- `KeyboardInterrupt` during `time.sleep()` propagates naturally. The engine's + `finally` block clears `on_content_delta`/`on_retry`. Verify `_ThinkingDisplay` + stops cleanly. + +### TUI interaction + +- During a 429 wait, the `_ThinkingDisplay` ("Thinking...") may still be active. + The retry countdown fires through `on_event`, which appears in the event log. + This is adequate for v1 -- a more polished approach (stopping the thinking + display and replacing it with a countdown widget) can follow later. + +### Security considerations + +#### Research Insights + +- **Truncate error response bodies:** HTTP error responses can contain large + payloads. Cap at 8 KB before embedding in `ModelError` messages to prevent + memory issues. Apply in both `_http_stream_sse` and `_http_json`. +- **Keep `on_retry` messages application-controlled:** The retry message strings + are generated by our code, not echoed from the server response. This prevents + any server-supplied content from reaching the TUI unescaped. +- **No credentials in error messages:** The existing pattern already strips + `Authorization` headers from error context. No changes needed. + +## System-Wide Impact + +- **Interaction graph:** `_http_stream_sse()` is called by + `OpenAICompatibleModel.complete()` and `AnthropicModel.complete()`, both of + which have a fallback retry path for unsupported parameters. All 4 call sites + (2 primary + 2 fallback) need `on_retry` threaded through. +- **Error propagation:** After 5 retries, `ModelError` propagates exactly as + today. The error message includes the retry count for debuggability. Format: + `"HTTP 429 calling {url}: rate limited, exhausted {N} retries"` (matches + existing `"HTTP {code} calling {url}: {detail}"` pattern). +- **State lifecycle risks:** None. The retry loop is stateless -- it only reads + the `Retry-After` header and sleeps. No persistent state is created or modified. +- **API surface parity:** `_http_json()` gets the same retry logic but without + the callback (silent retry). `_http_stream_sse()` gets retry + callback. +- **Test helpers:** `mock_openai_stream` and `mock_anthropic_stream` in + `conftest.py` must accept the new `on_retry` parameter. + +#### Research Insights + +- **`on_retry` depth gating:** Unlike `on_content_delta` (which only fires at + `depth == 0`), `on_retry` should fire at ALL depths. Rate-limit messages are + operational status, not content -- the user should always see them regardless + of recursion depth. Wire it unconditionally in `_solve_recursive()`. +- **`finally` block parity:** The `finally` block in `_solve_recursive()` must + clear both `on_content_delta` and `on_retry`. Use the existing `hasattr` guard + pattern. +- **Worst-case timing:** 5 rate-limit retries x 120s cap = ~10 minutes max hang + per single `_http_stream_sse` call. With the inner connection retry loop, the + theoretical ceiling is 5 x (3 connection attempts + 120s sleep) but in practice + connection retries are fast failures. Document as known limitation. + +## Acceptance Criteria + +### Functional + +- [x] 429 with `Retry-After: N` header triggers automatic retry after N seconds +- [x] Missing `Retry-After` header falls back to 5-second wait +- [x] `Retry-After` values are clamped to [1, 120] seconds +- [x] Max 5 retry attempts before raising `ModelError` +- [x] `on_retry` callback receives messages like `"Rate limited (attempt 2/5). Retrying in 8s..."` +- [x] Countdown ticks once per second (`"...7s"`, `"...6s"`, etc.) +- [x] Non-429 HTTP errors still raise immediately (no regression) +- [x] Connection-timeout retries still work independently of 429 retries +- [x] `_http_json()` retries 429 silently (no callback, same retry logic) +- [x] After exhausting retries, error message includes retry count +- [x] Parameter-fallback retry paths (`reasoning_effort`, `thinking`) also get + 429 protection on their second `_http_stream_sse()` call + +### Non-Functional + +- [x] Thread-safe: parallel workers can retry independently without corruption +- [x] `KeyboardInterrupt` during sleep cancels cleanly (no TUI corruption) +- [x] No new dependencies (stdlib `time.sleep` only) +- [x] Error response bodies truncated to 8 KB in `ModelError` messages +- [x] `on_retry` callback failures never kill the retry loop + +## MVP + +### Step 1: Add 429 retry to `_http_stream_sse()` + +**`agent/model.py`** + +Add `import time` at top of file. Add `_parse_retry_after()`, +`_notify_retry()`, and `_sleep_with_countdown()` as module-level helpers. Add +`on_retry` and `max_rate_limit_retries` parameters to `_http_stream_sse()`. +Restructure the retry loop with an outer 429-retry layer. + +```python +# agent/model.py - new helpers (above _http_stream_sse) + +def _parse_retry_after(exc: urllib.error.HTTPError, default: int = 5) -> int: + """Parse Retry-After header from an HTTPError, clamped to [1, 120].""" + raw = exc.headers.get("Retry-After") + if raw is None: + return default + try: + return max(1, min(120, int(raw))) + except (ValueError, TypeError): + return default + + +def _notify_retry(on_retry: "Callable[[str], None] | None", msg: str) -> None: + """Fire on_retry callback, swallowing any exception.""" + if on_retry is not None: + try: + on_retry(msg) + except Exception: + pass + + +def _sleep_with_countdown( + seconds: int, + attempt: int, + max_attempts: int, + on_retry: "Callable[[str], None] | None", +) -> None: + """Sleep for `seconds`, firing on_retry with a countdown each second.""" + for remaining in range(seconds, 0, -1): + _notify_retry( + on_retry, + f"Rate limited (attempt {attempt}/{max_attempts}). " + f"Retrying in {remaining}s...", + ) + time.sleep(1) +``` + +```python +# agent/model.py - _http_stream_sse signature +def _http_stream_sse( + url: str, + method: str, + headers: dict[str, str], + payload: dict[str, Any], + first_byte_timeout: float = 10, + stream_timeout: float = 120, + max_retries: int = 3, + on_sse_event: "Callable[[str, dict[str, Any]], None] | None" = None, + on_retry: "Callable[[str], None] | None" = None, # NEW + max_rate_limit_retries: int = 5, # NEW +) -> list[tuple[str, dict[str, Any]]]: +``` + +### Step 2: Add 429 retry to `_http_json()` + +**`agent/model.py`** + +Add a retry loop to `_http_json()` for 429 responses. Silent (no callback) since +the model-listing call chain doesn't pass callbacks. + +```python +# agent/model.py - _http_json with retry loop +def _http_json( + url: str, + method: str, + headers: dict[str, str], + payload: dict[str, Any] | None = None, + timeout_sec: int = 90, + max_rate_limit_retries: int = 5, # NEW +) -> dict[str, Any]: +``` + +### Step 3: Thread `on_retry` through model classes + +**`agent/model.py`** + +Add `on_retry: Callable[[str], None] | None = None` field to both +`OpenAICompatibleModel` and `AnthropicModel` (alongside existing +`on_content_delta`). Pass it through to `_http_stream_sse()` in `complete()` -- +all 4 call sites (2 primary + 2 fallback). + +### Step 4: Wire `on_retry` in the engine + +**`agent/engine.py`** + +Set `model.on_retry` in `_solve_recursive()` before calling `model.complete()`, +and clear it in the `finally` block. Wire it **unconditionally** (not gated by +`depth == 0` like `on_content_delta`) so rate-limit messages always reach the TUI. + +```python +# engine.py - in _solve_recursive, alongside on_content_delta setup +# NOTE: on_retry fires at ALL depths (not depth-gated) +model.on_retry = (lambda msg: self._emit(msg, on_event)) if on_event else None +``` + +```python +# engine.py - in finally block, clear both callbacks +if hasattr(model, "on_content_delta"): + model.on_content_delta = None +if hasattr(model, "on_retry"): + model.on_retry = None +``` + +### Step 5: Update test helpers + +**`tests/conftest.py`** + +Update `mock_openai_stream` and `mock_anthropic_stream` wrapper signatures to +accept `on_retry=None` and `max_rate_limit_retries=5`. + +### Step 6: Write tests (TDD -- these come first in practice) + +**`tests/test_rate_limit.py`** (new file) + +Tests must patch `time.sleep` via `unittest.mock.patch("agent.model.time.sleep")` +to avoid actually sleeping during test runs. + +- `test_parse_retry_after_with_valid_header` +- `test_parse_retry_after_missing_header_uses_default` +- `test_parse_retry_after_clamped_to_bounds` +- `test_parse_retry_after_non_integer_uses_default` +- `test_429_retries_and_succeeds` +- `test_429_exhausts_retries_raises_model_error` +- `test_429_on_retry_callback_invoked_with_countdown` +- `test_non_429_http_error_raises_immediately` (regression guard) +- `test_429_in_http_json_retries_silently` +- `test_connection_retry_and_429_retry_are_independent` +- `test_notify_retry_swallows_callback_exceptions` + +## Known Limitations + +- **Parallel worker stampede:** Multiple `ThreadPoolExecutor` workers can hit 429 + simultaneously and all retry at the same Retry-After expiry. No coordination + between workers. Acceptable for v1; consider adding jitter later. +- **`_ThinkingDisplay` overlap:** During a 429 wait, the thinking spinner + continues showing "Thinking..." alongside the rate-limit event message. A + dedicated countdown widget can be added in a follow-up. +- **`_http_json()` retries silently:** No countdown for `/model list`. Acceptable + since model listing is a quick, infrequent operation. +- **SSE-level rate limit errors** (Anthropic `overloaded_error` in-stream) are + not covered. Only HTTP-level 429 is handled. +- **HTTP 529 (`overloaded_error`)** is not retried. The architecture supports + adding `exc.code in (429, 529)` trivially in a follow-up. +- **Worst-case hang:** 5 retries x 120s cap = ~10 minutes per call with no + total wall-clock timeout. Acceptable for v1; a cumulative timeout can be added. + +## References + +- Brainstorm: `docs/brainstorms/2026-02-20-429-rate-limit-handling-brainstorm.md` +- Transport functions: `agent/model.py:98` (`_http_json`), `agent/model.py:206` + (`_http_stream_sse`) +- Engine error handling: `agent/engine.py:329` +- Existing retry tests: `tests/test_streaming.py:195` +- Test helpers: `tests/conftest.py:164` +- Anthropic rate limits docs: https://docs.anthropic.com/en/api/rate-limits +- Anthropic error types: https://docs.anthropic.com/en/api/errors diff --git a/tests/conftest.py b/tests/conftest.py index 85b5ac48..83107f72 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -163,7 +163,7 @@ def _anthropic_dict_to_events( def mock_openai_stream(fake_http_json_fn): """Wrap a _http_json-style mock into a _http_stream_sse-style mock for OpenAI.""" - def wrapper(url, method, headers, payload, first_byte_timeout=10, stream_timeout=120, max_retries=3, on_sse_event=None): + def wrapper(url, method, headers, payload, first_byte_timeout=10, stream_timeout=120, max_retries=3, on_sse_event=None, on_retry=None, max_rate_limit_retries=5): result = fake_http_json_fn(url, method, headers, payload=payload, timeout_sec=stream_timeout) return _openai_dict_to_events(result) return wrapper @@ -171,7 +171,7 @@ def wrapper(url, method, headers, payload, first_byte_timeout=10, stream_timeout def mock_anthropic_stream(fake_http_json_fn): """Wrap a _http_json-style mock into a _http_stream_sse-style mock for Anthropic.""" - def wrapper(url, method, headers, payload, first_byte_timeout=10, stream_timeout=120, max_retries=3, on_sse_event=None): + def wrapper(url, method, headers, payload, first_byte_timeout=10, stream_timeout=120, max_retries=3, on_sse_event=None, on_retry=None, max_rate_limit_retries=5): result = fake_http_json_fn(url, method, headers, payload=payload, timeout_sec=stream_timeout) return _anthropic_dict_to_events(result) return wrapper diff --git a/tests/test_model_complex.py b/tests/test_model_complex.py index 3e6a26c2..e8165cf8 100644 --- a/tests/test_model_complex.py +++ b/tests/test_model_complex.py @@ -116,7 +116,7 @@ def test_sorted_models_ordering(self) -> None: # 17-19. OpenAICompatibleModel error paths # ------------------------------------------------------------------ # def test_openai_missing_content_raises(self) -> None: - def fake_stream_sse(url, method, headers, payload, first_byte_timeout=10, stream_timeout=120, max_retries=3, on_sse_event=None): + def fake_stream_sse(url, method, headers, payload, first_byte_timeout=10, stream_timeout=120, max_retries=3, on_sse_event=None, on_retry=None, max_rate_limit_retries=5): # Return events that accumulate to empty choices return [("", {"choices": [{"delta": {}, "finish_reason": "stop"}]})] @@ -515,7 +515,7 @@ def test_openai_create_conversation_stores_system_prompt(self) -> None: # 37. OpenAI non-retryable ModelError re-raised # ------------------------------------------------------------------ # def test_openai_non_retryable_error_raised(self) -> None: - def fake_stream_sse(url, method, headers, payload, first_byte_timeout=10, stream_timeout=120, max_retries=3, on_sse_event=None): + def fake_stream_sse(url, method, headers, payload, first_byte_timeout=10, stream_timeout=120, max_retries=3, on_sse_event=None, on_retry=None, max_rate_limit_retries=5): raise ModelError("HTTP 500 server error") with patch("agent.model._http_stream_sse", fake_stream_sse): @@ -747,7 +747,7 @@ def fake_http_json(url, method, headers, payload=None, timeout_sec=90): # 49. Anthropic non-retryable ModelError re-raised # ------------------------------------------------------------------ # def test_anthropic_non_retryable_error_raised(self) -> None: - def fake_stream_sse(url, method, headers, payload, first_byte_timeout=10, stream_timeout=120, max_retries=3, on_sse_event=None): + def fake_stream_sse(url, method, headers, payload, first_byte_timeout=10, stream_timeout=120, max_retries=3, on_sse_event=None, on_retry=None, max_rate_limit_retries=5): raise ModelError("HTTP 500 server error") with patch("agent.model._http_stream_sse", fake_stream_sse): diff --git a/tests/test_rate_limit.py b/tests/test_rate_limit.py new file mode 100644 index 00000000..214ea4be --- /dev/null +++ b/tests/test_rate_limit.py @@ -0,0 +1,369 @@ +"""Tests for HTTP 429 rate-limit retry logic in agent.model.""" +from __future__ import annotations + +import io +import socket +import unittest +import urllib.error +from unittest.mock import MagicMock, call, patch + +from agent.model import ( + ModelError, + _http_json, + _http_stream_sse, + _notify_retry, + _parse_retry_after, + _sleep_with_countdown, +) + + +# --------------------------------------------------------------------------- +# _parse_retry_after +# --------------------------------------------------------------------------- + + +class ParseRetryAfterTests(unittest.TestCase): + """Tests for the _parse_retry_after pure helper.""" + + def _make_exc(self, retry_after: str | None) -> urllib.error.HTTPError: + headers = {} + if retry_after is not None: + headers["Retry-After"] = retry_after + exc = urllib.error.HTTPError( + url="http://test", + code=429, + msg="Too Many Requests", + hdrs=headers, + fp=io.BytesIO(b"{}"), + ) + return exc + + def test_valid_integer_header(self) -> None: + exc = self._make_exc("10") + self.assertEqual(_parse_retry_after(exc), 10) + + def test_missing_header_uses_default(self) -> None: + exc = self._make_exc(None) + self.assertEqual(_parse_retry_after(exc), 5) + + def test_missing_header_custom_default(self) -> None: + exc = self._make_exc(None) + self.assertEqual(_parse_retry_after(exc, default=30), 30) + + def test_clamped_below_floor(self) -> None: + exc = self._make_exc("0") + self.assertEqual(_parse_retry_after(exc), 1) + + def test_clamped_above_cap(self) -> None: + exc = self._make_exc("999") + self.assertEqual(_parse_retry_after(exc), 120) + + def test_non_integer_uses_default(self) -> None: + exc = self._make_exc("not-a-number") + self.assertEqual(_parse_retry_after(exc), 5) + + def test_negative_value_clamped_to_floor(self) -> None: + exc = self._make_exc("-5") + self.assertEqual(_parse_retry_after(exc), 1) + + +# --------------------------------------------------------------------------- +# _notify_retry +# --------------------------------------------------------------------------- + + +class NotifyRetryTests(unittest.TestCase): + """Tests for the _notify_retry callback wrapper.""" + + def test_calls_callback_with_message(self) -> None: + cb = MagicMock() + _notify_retry(cb, "test message") + cb.assert_called_once_with("test message") + + def test_none_callback_is_noop(self) -> None: + # Should not raise + _notify_retry(None, "test message") + + def test_swallows_callback_exception(self) -> None: + def broken_cb(msg: str) -> None: + raise RuntimeError("boom") + + # Should not raise + _notify_retry(broken_cb, "test message") + + +# --------------------------------------------------------------------------- +# _sleep_with_countdown +# --------------------------------------------------------------------------- + + +class SleepWithCountdownTests(unittest.TestCase): + """Tests for the _sleep_with_countdown helper.""" + + @patch("agent.model.time.sleep") + def test_sleeps_correct_number_of_seconds(self, mock_sleep: MagicMock) -> None: + cb = MagicMock() + _sleep_with_countdown(seconds=3, attempt=1, max_attempts=5, on_retry=cb) + self.assertEqual(mock_sleep.call_count, 3) + mock_sleep.assert_has_calls([call(1), call(1), call(1)]) + + @patch("agent.model.time.sleep") + def test_countdown_messages_descend(self, mock_sleep: MagicMock) -> None: + cb = MagicMock() + _sleep_with_countdown(seconds=3, attempt=2, max_attempts=5, on_retry=cb) + messages = [c.args[0] for c in cb.call_args_list] + self.assertEqual(len(messages), 3) + self.assertIn("3s", messages[0]) + self.assertIn("2s", messages[1]) + self.assertIn("1s", messages[2]) + # All messages should include attempt info + for msg in messages: + self.assertIn("attempt 2/5", msg) + + @patch("agent.model.time.sleep") + def test_none_callback_still_sleeps(self, mock_sleep: MagicMock) -> None: + _sleep_with_countdown(seconds=2, attempt=1, max_attempts=5, on_retry=None) + self.assertEqual(mock_sleep.call_count, 2) + + +# --------------------------------------------------------------------------- +# _http_stream_sse — 429 retry logic +# --------------------------------------------------------------------------- + + +def _make_successful_response() -> MagicMock: + """Create a mock HTTP response that yields a valid SSE stream.""" + data = 'data: {"choices":[{"delta":{"content":"ok"},"finish_reason":"stop"}]}\n\ndata: [DONE]\n' + resp = MagicMock() + resp.__iter__ = lambda self: iter(data.encode().split(b"\n")) + resp.__enter__ = lambda self: self + resp.__exit__ = lambda self, *a: None + resp.fp = MagicMock() + resp.close = MagicMock() + return resp + + +def _make_429_error(retry_after: str | None = "5") -> urllib.error.HTTPError: + """Create a 429 HTTPError with optional Retry-After header.""" + headers = {} + if retry_after is not None: + headers["Retry-After"] = retry_after + return urllib.error.HTTPError( + url="http://test", + code=429, + msg="Too Many Requests", + hdrs=headers, + fp=io.BytesIO(b'{"type":"error","error":{"type":"rate_limit_error","message":"rate limited"}}'), + ) + + +class HttpStreamSSE429Tests(unittest.TestCase): + """Tests for 429 retry logic in _http_stream_sse.""" + + @patch("agent.model.time.sleep") + def test_429_retries_and_succeeds(self, mock_sleep: MagicMock) -> None: + """First call returns 429, second succeeds.""" + call_count = 0 + + def fake_urlopen(req, timeout=None): + nonlocal call_count + call_count += 1 + if call_count == 1: + raise _make_429_error("2") + return _make_successful_response() + + with patch("agent.model.urllib.request.urlopen", fake_urlopen): + events = _http_stream_sse( + url="http://test/v1/chat/completions", + method="POST", + headers={}, + payload={"model": "test"}, + max_rate_limit_retries=5, + ) + self.assertEqual(call_count, 2) + self.assertTrue(len(events) > 0) + + @patch("agent.model.time.sleep") + def test_429_exhausts_retries_raises_model_error(self, mock_sleep: MagicMock) -> None: + """All attempts return 429 → ModelError with retry count.""" + + def fake_urlopen(req, timeout=None): + raise _make_429_error("1") + + with patch("agent.model.urllib.request.urlopen", fake_urlopen): + with self.assertRaises(ModelError) as ctx: + _http_stream_sse( + url="http://test/v1/chat/completions", + method="POST", + headers={}, + payload={"model": "test"}, + max_rate_limit_retries=3, + ) + msg = str(ctx.exception) + self.assertIn("429", msg) + self.assertIn("3", msg) # retry count in message + + @patch("agent.model.time.sleep") + def test_429_on_retry_callback_invoked_with_countdown(self, mock_sleep: MagicMock) -> None: + """on_retry callback receives countdown messages during 429 wait.""" + call_count = 0 + retry_messages: list[str] = [] + + def fake_urlopen(req, timeout=None): + nonlocal call_count + call_count += 1 + if call_count == 1: + raise _make_429_error("2") + return _make_successful_response() + + def on_retry_cb(msg: str) -> None: + retry_messages.append(msg) + + with patch("agent.model.urllib.request.urlopen", fake_urlopen): + _http_stream_sse( + url="http://test/v1/chat/completions", + method="POST", + headers={}, + payload={"model": "test"}, + on_retry=on_retry_cb, + max_rate_limit_retries=5, + ) + # Should have countdown messages (2s, 1s) + self.assertEqual(len(retry_messages), 2) + self.assertIn("2s", retry_messages[0]) + self.assertIn("1s", retry_messages[1]) + + def test_non_429_http_error_raises_immediately(self) -> None: + """HTTP 400 errors should raise immediately without retrying.""" + call_count = 0 + + def fake_urlopen(req, timeout=None): + nonlocal call_count + call_count += 1 + raise urllib.error.HTTPError( + url="http://test", + code=400, + msg="Bad Request", + hdrs={}, + fp=io.BytesIO(b'{"error": "bad request"}'), + ) + + with patch("agent.model.urllib.request.urlopen", fake_urlopen): + with self.assertRaises(ModelError) as ctx: + _http_stream_sse( + url="http://test/v1/chat/completions", + method="POST", + headers={}, + payload={"model": "test"}, + max_rate_limit_retries=5, + ) + self.assertIn("HTTP 400", str(ctx.exception)) + self.assertEqual(call_count, 1) + + @patch("agent.model.time.sleep") + def test_connection_retry_and_429_retry_are_independent(self, mock_sleep: MagicMock) -> None: + """Connection retries exhaust independently, then 429 retry kicks in.""" + call_count = 0 + + def fake_urlopen(req, timeout=None): + nonlocal call_count + call_count += 1 + # First 2 calls: connection timeout (uses connection retry budget) + if call_count <= 2: + raise socket.timeout("timed out") + # Third call succeeds on connection but gets 429 + if call_count == 3: + raise _make_429_error("1") + # Fourth call succeeds + return _make_successful_response() + + with patch("agent.model.urllib.request.urlopen", fake_urlopen): + events = _http_stream_sse( + url="http://test/v1/chat/completions", + method="POST", + headers={}, + payload={"model": "test"}, + max_retries=3, + max_rate_limit_retries=5, + ) + self.assertEqual(call_count, 4) + self.assertTrue(len(events) > 0) + + @patch("agent.model.time.sleep") + def test_429_without_retry_after_uses_fallback(self, mock_sleep: MagicMock) -> None: + """429 without Retry-After header falls back to 5-second wait.""" + call_count = 0 + + def fake_urlopen(req, timeout=None): + nonlocal call_count + call_count += 1 + if call_count == 1: + raise _make_429_error(None) # No Retry-After header + return _make_successful_response() + + with patch("agent.model.urllib.request.urlopen", fake_urlopen): + _http_stream_sse( + url="http://test/v1/chat/completions", + method="POST", + headers={}, + payload={"model": "test"}, + max_rate_limit_retries=5, + ) + # Default fallback is 5 seconds → 5 sleep calls + self.assertEqual(mock_sleep.call_count, 5) + + +# --------------------------------------------------------------------------- +# _http_json — 429 retry logic +# --------------------------------------------------------------------------- + + +class HttpJson429Tests(unittest.TestCase): + """Tests for 429 retry logic in _http_json.""" + + @patch("agent.model.time.sleep") + def test_429_retries_silently_and_succeeds(self, mock_sleep: MagicMock) -> None: + """_http_json retries on 429 without any callback.""" + call_count = 0 + + def fake_urlopen(req, timeout=None): + nonlocal call_count + call_count += 1 + if call_count == 1: + raise _make_429_error("1") + resp = MagicMock() + resp.read.return_value = b'{"data": "ok"}' + resp.__enter__ = lambda self: self + resp.__exit__ = lambda self, *a: None + return resp + + with patch("agent.model.urllib.request.urlopen", fake_urlopen): + result = _http_json( + url="http://test/v1/models", + method="GET", + headers={}, + max_rate_limit_retries=5, + ) + self.assertEqual(call_count, 2) + self.assertEqual(result["data"], "ok") + + @patch("agent.model.time.sleep") + def test_429_exhausts_retries_raises_model_error(self, mock_sleep: MagicMock) -> None: + def fake_urlopen(req, timeout=None): + raise _make_429_error("1") + + with patch("agent.model.urllib.request.urlopen", fake_urlopen): + with self.assertRaises(ModelError) as ctx: + _http_json( + url="http://test/v1/models", + method="GET", + headers={}, + max_rate_limit_retries=3, + ) + msg = str(ctx.exception) + self.assertIn("429", msg) + self.assertIn("3", msg) + + +if __name__ == "__main__": + unittest.main() From fccc947907e81be4a1a3700ab6af842565de0dd9 Mon Sep 17 00:00:00 2001 From: Kostas Karachalios Date: Fri, 20 Feb 2026 20:14:27 +0100 Subject: [PATCH 2/3] test: add engine integration test for on_retry wiring Verify that 429 retry messages reach the engine's on_event callback end-to-end, and that model.on_retry is cleared after complete() returns. --- tests/test_rate_limit.py | 66 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/tests/test_rate_limit.py b/tests/test_rate_limit.py index 214ea4be..bd9f7a43 100644 --- a/tests/test_rate_limit.py +++ b/tests/test_rate_limit.py @@ -3,18 +3,25 @@ import io import socket +import tempfile import unittest import urllib.error +from pathlib import Path from unittest.mock import MagicMock, call, patch +from agent.config import AgentConfig +from agent.engine import RLMEngine from agent.model import ( + AnthropicModel, ModelError, + ModelTurn, _http_json, _http_stream_sse, _notify_retry, _parse_retry_after, _sleep_with_countdown, ) +from agent.tools import WorkspaceTools # --------------------------------------------------------------------------- @@ -365,5 +372,64 @@ def fake_urlopen(req, timeout=None): self.assertIn("3", msg) +# --------------------------------------------------------------------------- +# Engine integration — on_retry wiring +# --------------------------------------------------------------------------- + + +class EngineOnRetryWiringTests(unittest.TestCase): + """Verify the engine sets/clears model.on_retry and messages reach on_event.""" + + @patch("agent.model.time.sleep") + def test_engine_on_event_receives_retry_messages(self, mock_sleep: MagicMock) -> None: + """A 429 during model.complete() should surface retry messages via on_event.""" + call_count = 0 + + def fake_urlopen(req, timeout=None): + nonlocal call_count + call_count += 1 + if call_count == 1: + raise _make_429_error("1") + # Second call: return a valid Anthropic SSE response + data = ( + 'event: message_start\ndata: {"type":"message_start","message":{"usage":{"input_tokens":10}}}\n\n' + 'event: content_block_start\ndata: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}\n\n' + 'event: content_block_delta\ndata: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"done"}}\n\n' + 'event: content_block_stop\ndata: {"type":"content_block_stop","index":0}\n\n' + 'event: message_delta\ndata: {"type":"message_delta","delta":{"stop_reason":"end_turn"},"usage":{"output_tokens":5}}\n\n' + 'event: message_stop\ndata: {"type":"message_stop"}\n\n' + ) + resp = MagicMock() + resp.__iter__ = lambda self: iter(data.encode().split(b"\n")) + resp.__enter__ = lambda self: self + resp.__exit__ = lambda self, *a: None + resp.fp = MagicMock() + resp.close = MagicMock() + return resp + + events_received: list[str] = [] + + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + cfg = AgentConfig(workspace=root, max_depth=0, max_steps_per_call=1) + tools = WorkspaceTools(root=root) + model = AnthropicModel(model="test-model", api_key="test-key") + engine = RLMEngine(model=model, tools=tools, config=cfg) + + with patch("agent.model.urllib.request.urlopen", fake_urlopen): + engine.solve( + "test objective", + on_event=lambda msg: events_received.append(msg), + ) + + # The retry message should appear in the event stream + retry_events = [e for e in events_received if "Rate limited" in e] + self.assertTrue(len(retry_events) > 0, f"Expected retry events, got: {events_received}") + self.assertIn("1s", retry_events[0]) + + # on_retry should be cleared after complete() returns + self.assertIsNone(model.on_retry) + + if __name__ == "__main__": unittest.main() From 79aac6bea70c9f37a6c70fe5dbb4f1890e68b0c8 Mon Sep 17 00:00:00 2001 From: Kostas Karachalios Date: Fri, 20 Feb 2026 20:22:57 +0100 Subject: [PATCH 3/3] fix: add depth/step prefix to rate-limit retry messages Retry countdown messages now include the [d0/s6] prefix matching all other engine trace lines, for consistent TUI output. --- agent/engine.py | 4 +++- tests/test_rate_limit.py | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/agent/engine.py b/agent/engine.py index f04845df..2c209c56 100644 --- a/agent/engine.py +++ b/agent/engine.py @@ -327,7 +327,9 @@ def _solve_recursive( model.on_content_delta = on_content_delta # Rate-limit messages fire at all depths if on_event and hasattr(model, "on_retry"): - model.on_retry = lambda msg: self._emit(msg, on_event) + model.on_retry = lambda msg, _d=depth, _s=step: self._emit( + f"[d{_d}/s{_s}] {msg}", on_event + ) try: turn = model.complete(conversation) except ModelError as exc: diff --git a/tests/test_rate_limit.py b/tests/test_rate_limit.py index bd9f7a43..69d43f7d 100644 --- a/tests/test_rate_limit.py +++ b/tests/test_rate_limit.py @@ -422,10 +422,11 @@ def fake_urlopen(req, timeout=None): on_event=lambda msg: events_received.append(msg), ) - # The retry message should appear in the event stream + # The retry message should appear in the event stream with depth/step prefix retry_events = [e for e in events_received if "Rate limited" in e] self.assertTrue(len(retry_events) > 0, f"Expected retry events, got: {events_received}") self.assertIn("1s", retry_events[0]) + self.assertRegex(retry_events[0], r"\[d\d+/s\d+\]") # on_retry should be cleared after complete() returns self.assertIsNone(model.on_retry)