diff --git a/src/agent/worker.rs b/src/agent/worker.rs
index acf4af1de..1f248ab79 100644
--- a/src/agent/worker.rs
+++ b/src/agent/worker.rs
@@ -5,21 +5,41 @@ use crate::config::BrowserConfig;
 use crate::error::Result;
 use crate::hooks::{SpacebotHook, ToolNudgePolicy};
 use crate::llm::SpacebotModel;
-use crate::llm::routing::is_context_overflow_error;
+use crate::llm::routing::{is_context_overflow_error, is_retriable_error};
 use crate::{AgentDeps, ChannelId, ProcessId, ProcessType, WorkerId};
 use rig::agent::AgentBuilder;
 use rig::completion::CompletionModel;
+use std::collections::HashMap;
 use std::fmt::Write as _;
 use std::path::PathBuf;
 use tokio::sync::{mpsc, watch};
 use uuid::Uuid;
 
 /// How many turns per segment before we check context and potentially compact.
-const TURNS_PER_SEGMENT: usize = 25;
+///
+/// Kept relatively low so compaction checks run frequently. Fast models can
+/// burn through many tool-call turns quickly, and each turn may add large
+/// tool results (browser snapshots, shell output). Checking every 15 turns
+/// instead of 25 reduces the chance of blowing past the context window
+/// within a single segment.
+const TURNS_PER_SEGMENT: usize = 15;
 
 /// Max consecutive context overflow recoveries before giving up.
-/// Prevents infinite compact-retry loops if something is fundamentally wrong.
-const MAX_OVERFLOW_RETRIES: usize = 3;
+/// Each retry dedup-strips stale tool results and force-compacts 75% of
+/// remaining messages. Two retries is enough to handle the edge case where
+/// a single message is enormous — beyond that, something is fundamentally
+/// broken (system prompt alone exceeds the context window, or the compaction
+/// floor of 4 messages is still too large).
+const MAX_OVERFLOW_RETRIES: usize = 2;
+
+/// Max consecutive transient provider error retries before giving up.
+/// Transient errors (upstream 500s, timeouts, rate limits that survived
+/// model-level retries) get a backoff-and-retry at the worker level so
+/// the worker survives temporary provider outages.
+const MAX_TRANSIENT_RETRIES: usize = 5;
+
+/// Base delay for worker-level transient error backoff (doubles each retry).
+const TRANSIENT_RETRY_BASE_DELAY: std::time::Duration = std::time::Duration::from_secs(5);
 
 /// Max segments before the worker gives up and returns a partial result.
 /// Prevents unbounded worker loops when the LLM keeps hitting max_turns
@@ -302,6 +322,7 @@ impl Worker {
         let mut prompt = self.task.clone();
         let mut segments_run = 0;
         let mut overflow_retries = 0;
+        let mut transient_retries = 0;
 
         let mut result = if resuming {
             // For resumed workers, synthesize a "result" from the task
@@ -311,6 +332,17 @@ impl Worker {
             loop {
                 segments_run += 1;
 
+                // Pre-prompt maintenance: dedup stale tool results and check
+                // context usage *before* each LLM call, not just at segment
+                // boundaries. Fast models can accumulate large tool results
+                // within a single segment and exceed the context window before
+                // we ever reach a checkpoint.
+                if segments_run > 1 {
+                    dedup_tool_results(&mut history);
+                    self.maybe_compact_history(&mut compacted_history, &mut history)
+                        .await;
+                }
+
                 match self
                     .hook
                     .prompt_with_tool_nudge_retry(&agent, &mut history, &prompt)
@@ -321,6 +353,7 @@ impl Worker {
                     }
                     Err(rig::completion::PromptError::MaxTurnsError { .. }) => {
                         overflow_retries = 0;
+                        transient_retries = 0;
 
                         if segments_run >= MAX_SEGMENTS {
                             tracing::warn!(
@@ -337,6 +370,7 @@ impl Worker {
                         }
 
                         self.persist_transcript(&compacted_history, &history).await;
+                        dedup_tool_results(&mut history);
                         self.maybe_compact_history(&mut compacted_history, &mut history)
                             .await;
                         prompt =
@@ -377,6 +411,7 @@ impl Worker {
                             "context overflow, compacting and retrying"
                         );
                         self.hook.send_status("compacting (overflow recovery)");
+                        dedup_tool_results(&mut history);
                         self.force_compact_history(&mut compacted_history, &mut history)
                             .await;
                         prompt = "Continue where you left off. Do not repeat completed work. \
@@ -384,6 +419,43 @@ impl Worker {
                               has been compacted."
                             .into();
                     }
+                    Err(error) if is_retriable_error(&error.to_string()) => {
+                        transient_retries += 1;
+                        if transient_retries > MAX_TRANSIENT_RETRIES {
+                            self.state = WorkerState::Failed;
+                            self.hook.send_status("failed");
+                            self.write_failure_log(&history, &format!(
+                                "transient provider error after {MAX_TRANSIENT_RETRIES} retries: {error}"
+                            ));
+                            self.persist_transcript(&compacted_history, &history).await;
+                            tracing::error!(
+                                worker_id = %self.id,
+                                retries = MAX_TRANSIENT_RETRIES,
+                                %error,
+                                "worker transient error retries exhausted"
+                            );
+                            return Err(crate::error::AgentError::Other(error.into()).into());
+                        }
+
+                        let delay =
+                            TRANSIENT_RETRY_BASE_DELAY * 2u32.pow((transient_retries - 1) as u32);
+                        tracing::warn!(
+                            worker_id = %self.id,
+                            attempt = transient_retries,
+                            delay_secs = delay.as_secs(),
+                            %error,
+                            "transient provider error, backing off and retrying"
+                        );
+                        self.hook.send_status(format!(
+                            "provider error, retrying in {}s ({transient_retries}/{MAX_TRANSIENT_RETRIES})",
+                            delay.as_secs()
+                        ));
+                        tokio::time::sleep(delay).await;
+
+                        // Don't change the prompt — just retry with the same
+                        // state. The LLM never saw this request so there's
+                        // nothing to "continue" from.
+                    }
                     Err(error) => {
                         self.state = WorkerState::Failed;
                         self.hook.send_status("failed");
@@ -440,12 +512,14 @@ impl Worker {
                 self.state = WorkerState::Running;
                 self.hook.send_status("processing follow-up");
 
-                // Compact before follow-up if needed
+                // Dedup stale tool results and compact before follow-up if needed
+                dedup_tool_results(&mut history);
                 self.maybe_compact_history(&mut compacted_history, &mut history)
                     .await;
 
                 let mut follow_up_prompt = follow_up.clone();
                 let mut follow_up_overflow_retries = 0;
+                let mut follow_up_transient_retries = 0u32;
                 let follow_up_hook = self
                     .hook
                     .clone()
@@ -474,12 +548,38 @@ impl Worker {
                                 "follow-up context overflow, compacting and retrying"
                             );
                             self.hook.send_status("compacting (overflow recovery)");
+                            dedup_tool_results(&mut history);
                             self.force_compact_history(&mut compacted_history, &mut history)
                                 .await;
                             let prompt_engine = self.deps.runtime_config.prompts.load();
                             let overflow_msg = prompt_engine.render_system_worker_overflow()?;
                             follow_up_prompt = format!("{follow_up}\n\n{overflow_msg}");
                         }
+                        Err(error) if is_retriable_error(&error.to_string()) => {
+                            follow_up_transient_retries += 1;
+                            if follow_up_transient_retries > MAX_TRANSIENT_RETRIES as u32 {
+                                let failure_reason = format!(
+                                    "follow-up transient error after {MAX_TRANSIENT_RETRIES} retries: {error}"
+                                );
+                                self.write_failure_log(&history, &failure_reason);
+                                tracing::error!(worker_id = %self.id, %error, "follow-up transient retries exhausted");
+                                break Err(failure_reason);
+                            }
+                            let delay = TRANSIENT_RETRY_BASE_DELAY
+                                * 2u32.pow(follow_up_transient_retries - 1);
+                            tracing::warn!(
+                                worker_id = %self.id,
+                                attempt = follow_up_transient_retries,
+                                delay_secs = delay.as_secs(),
+                                %error,
+                                "follow-up transient error, backing off and retrying"
+                            );
+                            self.hook.send_status(format!(
+                                "provider error, retrying in {}s ({follow_up_transient_retries}/{MAX_TRANSIENT_RETRIES})",
+                                delay.as_secs()
+                            ));
+                            tokio::time::sleep(delay).await;
+                        }
                         Err(error) => {
                             let failure_reason = format!("follow-up failed: {error}");
                             self.write_failure_log(&history, &failure_reason);
@@ -865,6 +965,98 @@ impl Worker {
     }
 }
 
+/// Tool names whose results are bulky and superseded by the latest call.
+/// Only the most recent result for each tool is kept in full; older results
+/// are replaced with a short marker to save context space.
+///
+/// This runs in-place on the history before every LLM call, so the model
+/// always has the latest snapshot but doesn't waste context on stale ones.
+const DEDUP_TOOL_RESULTS: &[&str] = &["browser_snapshot", "browser_tab_list"];
+
+/// Replace all but the most recent result for each tool in `DEDUP_TOOL_RESULTS`
+/// with a short placeholder. This dramatically reduces context usage for
+/// browser-heavy workflows where `browser_snapshot` returns large ARIA trees
+/// on every call.
+///
+/// Note: this mutates `history` in-place, so superseded results are also
+/// replaced in the persisted transcript.
+fn dedup_tool_results(history: &mut [rig::message::Message]) {
+    // Step 1: Build a map from tool-call ID → tool name for dedup-eligible tools.
+    // We need this because ToolResult only has call_id, not the tool name.
+    let mut call_id_to_tool: HashMap<String, String> = HashMap::new();
+    for message in history.iter() {
+        if let rig::message::Message::Assistant { content, .. } = message {
+            for item in content.iter() {
+                if let rig::message::AssistantContent::ToolCall(tc) = item
+                    && DEDUP_TOOL_RESULTS.contains(&tc.function.name.as_str())
+                {
+                    // Rig uses call_id when present, falls back to id.
+                    let effective_id = tc.call_id.as_ref().unwrap_or(&tc.id);
+                    call_id_to_tool.insert(effective_id.clone(), tc.function.name.clone());
+                }
+            }
+        }
+    }
+
+    if call_id_to_tool.is_empty() {
+        return;
+    }
+
+    // Step 2: Find the last (most recent) result position for each tool name.
+    // Tracked as (message_index, item_index) since Rig can pack multiple
+    // ToolResult entries into a single User message.
+    let mut last_result_position: HashMap<&str, (usize, usize)> = HashMap::new();
+    for (message_index, message) in history.iter().enumerate() {
+        if let rig::message::Message::User { content } = message {
+            for (item_index, item) in content.iter().enumerate() {
+                if let rig::message::UserContent::ToolResult(tr) = item
+                    && let Some(call_id) = &tr.call_id
+                    && let Some(tool_name) = call_id_to_tool.get(call_id)
+                {
+                    last_result_position.insert(
+                        // Safe: tool_name came from DEDUP_TOOL_RESULTS which is 'static
+                        DEDUP_TOOL_RESULTS
+                            .iter()
+                            .find(|&&name| name == tool_name)
+                            .expect("tool name came from DEDUP_TOOL_RESULTS"),
+                        (message_index, item_index),
+                    );
+                }
+            }
+        }
+    }
+
+    // Step 3: Replace older results with a compact marker.
+    let mut replaced = 0usize;
+    for (message_index, message) in history.iter_mut().enumerate() {
+        if let rig::message::Message::User { content } = message {
+            for (item_index, item) in content.iter_mut().enumerate() {
+                if let rig::message::UserContent::ToolResult(tr) = item
+                    && let Some(call_id) = &tr.call_id
+                    && let Some(tool_name) = call_id_to_tool.get(call_id)
+                {
+                    let is_last = last_result_position
+                        .get(tool_name.as_str())
+                        .is_some_and(|&last| last == (message_index, item_index));
+
+                    if !is_last {
+                        tr.content =
+                            rig::OneOrMany::one(rig::message::ToolResultContent::text(format!(
+                                "[{tool_name} output superseded by a more recent call — \
+                                 see latest {tool_name} result below]"
+                            )));
+                        replaced += 1;
+                    }
+                }
+            }
+        }
+    }
+
+    if replaced > 0 {
+        tracing::debug!(replaced, "deduped stale tool results in history");
+    }
+}
+
 /// Build a recap of removed worker history for the compaction marker.
 ///
 /// Extracts tool calls, assistant text, and tool results so the worker
diff --git a/src/hooks/loop_guard.rs b/src/hooks/loop_guard.rs
index 7d37e4294..984e62ad7 100644
--- a/src/hooks/loop_guard.rs
+++ b/src/hooks/loop_guard.rs
@@ -8,6 +8,14 @@ use std::collections::{HashMap, HashSet, VecDeque};
 const POLL_TOOLS: &[&str] = &["shell"];
 const HISTORY_CAPACITY: usize = 30;
 
+/// Tools that read changing state and are expected to be called repeatedly with
+/// identical arguments across a session. `browser_snapshot` always takes `{}`
+/// but returns different content after each page mutation. These tools get the
+/// poll multiplier on their consecutive-call threshold and are excluded from
+/// ping-pong detection patterns (since alternating snapshot→click is the normal
+/// browser workflow, not a stuck loop).
+const OBSERVATION_TOOLS: &[&str] = &["browser_snapshot", "browser_tab_list"];
+
 /// Avoids hashing multi-MB tool outputs while still catching identical short
 /// results. Large outputs that differ only in the tail (growing log files,
 /// etc.) won't hash-match, which is correct — the tool is returning new data.
@@ -79,7 +87,15 @@ pub enum LoopGuardVerdict {
 /// within one Rig `agent.prompt()` invocation, reset at prompt boundaries.
 pub struct LoopGuard {
     config: LoopGuardConfig,
+    /// Consecutive call count per `(tool, args)` hash. Reset to 0 for all
+    /// hashes except the current one whenever a *different* call is made.
+    /// This means `browser_snapshot` (empty args) can be called 50 times
+    /// across a session as long as other tools run in between — but 7 calls
+    /// in a row with nothing else is a real loop.
     call_counts: HashMap<String, u32>,
+    /// The call hash of the most recent `check()`. Used to detect whether the
+    /// current call is a continuation of the same repeated sequence.
+    last_call_hash: Option<String>,
     total_calls: u32,
     outcome_counts: HashMap<String, u32>,
     // Call hashes poisoned by outcome detection — next check() auto-blocks.
@@ -94,6 +110,7 @@ impl LoopGuard {
         Self {
             config,
             call_counts: HashMap::new(),
+            last_call_hash: None,
             total_calls: 0,
             outcome_counts: HashMap::new(),
             blocked_outcomes: HashSet::new(),
@@ -105,6 +122,7 @@ impl LoopGuard {
 
     pub fn reset(&mut self) {
         self.call_counts.clear();
+        self.last_call_hash = None;
         self.total_calls = 0;
         self.outcome_counts.clear();
         self.blocked_outcomes.clear();
@@ -142,6 +160,23 @@ impl LoopGuard {
             ));
         }
 
+        // Reset consecutive counts when the call hash changes. A different
+        // tool call in between means the model is doing real work, not looping.
+        // This prevents observation tools like `browser_snapshot` (which always
+        // have the same empty args) from being permanently blocked after N
+        // total uses across a long session.
+        let is_same_as_last = self.last_call_hash.as_ref() == Some(&call_hash);
+        if !is_same_as_last {
+            self.call_counts.clear();
+            // Reset per-hash warnings so the model gets fresh warnings if it
+            // starts a new loop with this tool later. Retain ping-pong warning
+            // counters so the max_warnings_per_call escalation still works for
+            // alternating patterns detected across tool switches.
+            self.warnings_emitted
+                .retain(|key, _| key.starts_with("pingpong_"));
+        }
+        self.last_call_hash = Some(call_hash.clone());
+
         let count = self.call_counts.entry(call_hash.clone()).or_insert(0);
         *count += 1;
         let count_value = *count;
@@ -240,6 +275,14 @@ impl LoopGuard {
             if a != b && tail[2] == a && tail[3] == b && tail[4] == a && tail[5] == b {
                 let tool_a = self.resolve_tool_name(a);
                 let tool_b = self.resolve_tool_name(b);
+
+                // Observation tools alternate with action tools as part of
+                // normal workflow (snapshot→click→snapshot→click). Don't flag
+                // these patterns as ping-pong.
+                if Self::involves_observation_tool(&tool_a, &tool_b) {
+                    return None;
+                }
+
                 return Some(format!(
                     "Ping-pong detected: tools '{}' and '{}' are alternating \
                      repeatedly. Break the cycle by trying a different approach.",
@@ -266,6 +309,14 @@ impl LoopGuard {
                 let tool_a = self.resolve_tool_name(a);
                 let tool_b = self.resolve_tool_name(b);
                 let tool_c = self.resolve_tool_name(c);
+
+                if Self::involves_observation_tool(&tool_a, &tool_b)
+                    || Self::involves_observation_tool(&tool_a, &tool_c)
+                    || Self::involves_observation_tool(&tool_b, &tool_c)
+                {
+                    return None;
+                }
+
                 return Some(format!(
                     "Ping-pong detected: tools '{}', '{}', '{}' are cycling \
                      repeatedly. Break the cycle by trying a different approach.",
@@ -277,6 +328,15 @@ impl LoopGuard {
         None
     }
 
+    /// Returns true if exactly one tool is an observation tool. Observation
+    /// tools (like `browser_snapshot`) naturally alternate with action tools
+    /// as part of normal workflow (snapshot→click) and should not be flagged
+    /// as ping-pong. Two observation tools alternating (snapshot↔tab_list)
+    /// is still suspicious and should be caught.
+    fn involves_observation_tool(tool_a: &str, tool_b: &str) -> bool {
+        OBSERVATION_TOOLS.contains(&tool_a) ^ OBSERVATION_TOOLS.contains(&tool_b)
+    }
+
     fn count_ping_pong_repeats(&self) -> u32 {
         let history: Vec<&String> = self.recent_calls.iter().collect();
         let length = history.len();
@@ -332,9 +392,16 @@ impl LoopGuard {
             .unwrap_or_else(|| "unknown".to_string())
     }
 
-    // Poll tools get relaxed thresholds because they're expected to be
-    // called repeatedly (checking build output, watching deployment status).
+    // Poll/observation tools get relaxed thresholds because they're expected
+    // to be called repeatedly (checking build output, watching deployment
+    // status, re-snapshotting browser state after page mutations).
     fn is_poll_call(tool_name: &str, args: &str) -> bool {
+        // Observation tools are always considered poll calls — they read
+        // changing state with identical args every time.
+        if OBSERVATION_TOOLS.contains(&tool_name) {
+            return true;
+        }
+
         if POLL_TOOLS.contains(&tool_name) {
             let args_lower = args.to_lowercase();
             if args.len() < 200
@@ -614,6 +681,70 @@ mod tests {
         assert_eq!(guard.total_calls, 1);
     }
 
+    #[test]
+    fn non_consecutive_identical_calls_allowed() {
+        // This is the browser_snapshot bug fix: calling the same parameterless
+        // tool many times across a session should be fine as long as other
+        // tools run in between (the page state changes between snapshots).
+        let mut guard = LoopGuard::new(worker_config());
+        let snapshot_args = "{}";
+        let click_args = r#"{"index": 3}"#;
+
+        // Simulate 20 rounds of snapshot → click → snapshot → click...
+        for _ in 0..20 {
+            let verdict = guard.check("browser_snapshot", snapshot_args);
+            assert_eq!(
+                verdict,
+                LoopGuardVerdict::Allow,
+                "browser_snapshot should be allowed when interleaved with other tools"
+            );
+            let verdict = guard.check("browser_click", click_args);
+            assert_eq!(verdict, LoopGuardVerdict::Allow);
+        }
+    }
+
+    #[test]
+    fn consecutive_identical_calls_still_blocked() {
+        // Calling the same tool with the same args many times IN A ROW should
+        // still trigger the block — that's a real loop.
+        let mut guard = LoopGuard::new(worker_config());
+        let args = r#"{"query":"same thing"}"#;
+
+        // Worker warn_threshold = 4. Calls 1-3 are Allow, call 4 hits warn.
+        for _ in 0..3 {
+            assert_eq!(guard.check("web_search", args), LoopGuardVerdict::Allow);
+        }
+        // Call 4 hits warn threshold.
+        let verdict = guard.check("web_search", args);
+        assert!(matches!(verdict, LoopGuardVerdict::Block(_)));
+    }
+
+    #[test]
+    fn observation_tool_consecutive_gets_relaxed_threshold() {
+        // Observation tools like browser_snapshot get the poll multiplier even
+        // when called consecutively, so they can handle legitimate sequences
+        // of multiple snapshots.
+        let mut guard = LoopGuard::new(worker_config());
+        let args = "{}";
+
+        // Worker warn_threshold=4, poll_multiplier=3, so effective warn=12.
+        // Calls 1-11 should all be allowed.
+        for i in 1..=11 {
+            let verdict = guard.check("browser_snapshot", args);
+            assert_eq!(
+                verdict,
+                LoopGuardVerdict::Allow,
+                "Call {i} should be allowed for observation tool"
+            );
+        }
+        // Call 12 should trigger a warning.
+        let verdict = guard.check("browser_snapshot", args);
+        assert!(
+            matches!(verdict, LoopGuardVerdict::Block(ref message) if message.contains("Warning")),
+            "Expected warning at relaxed threshold, got: {verdict:?}"
+        );
+    }
+
     #[test]
     fn warning_bucket_escalates_to_block() {
         let config = LoopGuardConfig {
diff --git a/src/llm/routing.rs b/src/llm/routing.rs
index 0c3b6744c..d6a435dec 100644
--- a/src/llm/routing.rs
+++ b/src/llm/routing.rs
@@ -120,6 +120,7 @@ pub fn is_retriable_error(error_message: &str) -> bool {
     let lower = error_message.to_lowercase();
     // Rate limits and server errors
     lower.contains("429")
+        || lower.contains("500")
         || lower.contains("502")
         || lower.contains("503")
         || lower.contains("504")
@@ -127,6 +128,11 @@ pub fn is_retriable_error(error_message: &str) -> bool {
         || lower.contains("overloaded")
         || lower.contains("timeout")
         || lower.contains("connection")
+        // Generic server errors (OpenRouter wraps upstream 500s in various
+        // phrasings like "The server had an error while processing your request")
+        || lower.contains("server error")
+        || lower.contains("server had an error")
+        || lower.contains("internal error")
         // Empty/malformed responses are transient provider issues
         || lower.contains("empty response")
         || lower.contains("failed to read response body")
diff --git a/src/tools/browser.rs b/src/tools/browser.rs
index fb231e09e..d0590846c 100644
--- a/src/tools/browser.rs
+++ b/src/tools/browser.rs
@@ -2235,7 +2235,8 @@ async fn get_element_center(
         .map_err(|error| {
             BrowserError::new(format!(
                 "failed to get box model for element: {error}. \
-                 The element may not be visible — try scrolling or taking a screenshot."
+                 The element may not be visible or may have been removed from the DOM. \
+                 Run browser_snapshot to get the current interactable elements and their indices."
             ))
         })?;