From 80857a2370cc0a6b3568fd0bc04f593cb2a15f0a Mon Sep 17 00:00:00 2001 From: James Pine Date: Tue, 3 Mar 2026 19:10:17 -0800 Subject: [PATCH 1/2] feat: persistent browser sessions with configurable close policy Add persist_session and close_policy to BrowserConfig, allowing browser instances to survive across worker lifetimes. When persist_session is enabled, workers share a single browser handle via Arc> and reconnect to existing tabs on launch instead of starting fresh. ClosePolicy controls what happens when a worker finishes: - close_browser (default): full teardown, same as before - close_tabs: close all pages but keep browser process alive - detach: leave browser and tabs running for the next worker Backend: SharedBrowserHandle type, BrowserTool::new_shared() constructor, reconnect_existing_tabs() discovery, close policy dispatch in handle_close(), RuntimeConfig.shared_browser field, API types and TOML support. Frontend: persist session toggle and close policy dropdown in AgentConfig. Docs: config.mdx reference table and browser.mdx persistent sessions guide. --- docs/content/docs/(configuration)/config.mdx | 8 +- docs/content/docs/(features)/browser.mdx | 76 +++++-- interface/src/api/client.ts | 4 + interface/src/routes/AgentConfig.tsx | 23 ++ src/api/agents.rs | 1 + src/api/config.rs | 12 + src/config/load.rs | 38 +++- src/config/runtime.rs | 22 ++ src/config/toml_schema.rs | 2 + src/config/types.rs | 37 ++++ src/main.rs | 1 + src/tools.rs | 17 +- src/tools/browser.rs | 222 ++++++++++++++++--- 13 files changed, 395 insertions(+), 68 deletions(-) diff --git a/docs/content/docs/(configuration)/config.mdx b/docs/content/docs/(configuration)/config.mdx index 7ad8f5834..d239814a9 100644 --- a/docs/content/docs/(configuration)/config.mdx +++ b/docs/content/docs/(configuration)/config.mdx @@ -106,8 +106,10 @@ startup_delay_secs = 5 enabled = true headless = true evaluate_enabled = false -executable_path = "/path/to/chrome" # optional, auto-detected -screenshot_dir = "/path/to/screenshots" # optional, defaults to data_dir/screenshots +persist_session = false # keep browser alive across worker lifetimes +close_policy = "close_browser" # "close_browser", "close_tabs", or "detach" +executable_path = "/path/to/chrome" # optional, auto-detected +screenshot_dir = "/path/to/screenshots" # optional, defaults to data_dir/screenshots # --- Agents --- # At least one agent is required. First agent or the one with default = true @@ -532,6 +534,8 @@ When branch/worker/cron dispatch happens before readiness is satisfied, Spacebot | `enabled` | bool | true | Whether workers have browser tools | | `headless` | bool | true | Run Chrome headless | | `evaluate_enabled` | bool | false | Allow JavaScript evaluation | +| `persist_session` | bool | false | Keep browser alive across worker lifetimes. Tabs, cookies, and logins survive between tasks. Requires agent restart to take effect. | +| `close_policy` | string | `"close_browser"` | What happens on close: `"close_browser"` (kill Chrome), `"close_tabs"` (close tabs, keep browser), `"detach"` (disconnect, leave everything) | | `executable_path` | string | None | Custom Chrome/Chromium path | | `screenshot_dir` | string | None | Directory for screenshots | diff --git a/docs/content/docs/(features)/browser.mdx b/docs/content/docs/(features)/browser.mdx index 219bdde9f..b7a66fa02 100644 --- a/docs/content/docs/(features)/browser.mdx +++ b/docs/content/docs/(features)/browser.mdx @@ -42,8 +42,8 @@ Single `browser` tool with an `action` discriminator. All actions share one argu | Action | Required Args | Description | |--------|--------------|-------------| -| `launch` | -- | Start Chrome. Must be called first. | -| `close` | -- | Shut down Chrome and clean up all state. | +| `launch` | -- | Start Chrome (or reconnect to a persistent browser). Must be called first. | +| `close` | -- | Shut down, close tabs, or detach depending on `close_policy`. | ### Navigation @@ -130,6 +130,8 @@ Browser config lives in `config.toml` under `[defaults.browser]` (or per-agent o enabled = true # include browser tool in worker ToolServers headless = true # run Chrome without a visible window evaluate_enabled = false # allow JavaScript evaluation via the tool +persist_session = false # keep browser alive across worker lifetimes +close_policy = "close_browser" # what happens on close executable_path = "" # custom Chrome binary path (auto-detected if empty) screenshot_dir = "" # override screenshot storage location ``` @@ -143,34 +145,68 @@ id = "web-scraper" [agents.browser] evaluate_enabled = true # this agent's workers can run JS headless = false # show the browser window for debugging +persist_session = true # keep tabs and logins between worker runs +close_policy = "detach" # workers disconnect without closing tabs ``` When `enabled = false`, the browser tool is not registered on worker ToolServers. Workers for that agent won't see it in their available tools. +### Close Policy + +Controls what happens when a worker calls `close` or finishes: + +| Policy | Behavior | +|--------|----------| +| `close_browser` | Kill Chrome and reset all state. Default. | +| `close_tabs` | Close all tracked tabs but keep Chrome running. | +| `detach` | Disconnect without touching tabs or the browser process. | + +`close_policy` is most useful with `persist_session = true`. With `detach`, workers leave the browser exactly as they found it. + +## Persistent Sessions + +By default, each worker launches its own Chrome process. When the worker finishes, the browser dies and all session state (cookies, tabs, logins) is lost. + +With `persist_session = true`, all workers for an agent share a single browser instance via a `SharedBrowserHandle` held in `RuntimeConfig`. The browser and its tabs survive across worker lifetimes. + +When a new worker calls `launch` on a persistent browser that's already running, it reconnects to the existing process, discovers all open tabs, and can continue where the previous worker left off. Combined with `close_policy = "detach"`, workers leave the browser untouched when they finish. + +This is useful for: + +- **Login persistence** -- log in once, reuse the session across multiple worker runs. +- **Watching the agent work** -- set `headless = false` to see a visible Chrome window that stays open. +- **Multi-step workflows** -- a worker can leave tabs open for a follow-up worker to pick up. + +```toml +[defaults.browser] +headless = false # visible Chrome window +persist_session = true # keep alive across workers +close_policy = "detach" # workers just disconnect +``` + +Changing `persist_session` requires an agent restart. + ## Architecture ``` -Worker (Rig Agent) - │ - ├── shell, file, exec, set_status (standard worker tools) - │ - └── browser (BrowserTool) - │ - ├── Arc> (shared across tool invocations) - │ ├── Browser (chromiumoxide handle) - │ ├── pages: HashMap (target_id → Page) - │ ├── active_target (current tab) - │ └── element_refs (snapshot ref → ElementRef) - │ - └── Config - ├── headless - ├── evaluate_enabled - └── screenshot_dir +# Default mode (persist_session = false): +Worker A → BrowserTool { own BrowserState } → own Chrome process +Worker B → BrowserTool { own BrowserState } → own Chrome process + +# Persistent mode (persist_session = true): +RuntimeConfig → SharedBrowserHandle (Arc>) +Worker A → BrowserTool { shared state } ──┐ +Worker B → BrowserTool { shared state } ──┤→ single Chrome process +Worker C → BrowserTool { shared state } ──┘ ``` -Each worker gets its own `BrowserTool` instance with its own `BrowserState`. The state is behind `Arc>` because the Rig tool trait requires `Clone`. The Chrome process (and its CDP WebSocket handler task) live for the lifetime of the worker. +In both modes, `BrowserState` holds: +- `Browser` -- chromiumoxide handle +- `pages: HashMap` -- target_id to Page +- `active_target` -- current tab +- `element_refs` -- snapshot ref to ElementRef -The CDP handler runs as a background tokio task that polls the WebSocket stream. It's spawned during `launch` and dropped when the browser closes or the worker completes. +The state is behind `Arc>` because the Rig tool trait requires `Clone`. The CDP handler runs as a background tokio task that polls the WebSocket stream, spawned during `launch`. ## Implementation diff --git a/interface/src/api/client.ts b/interface/src/api/client.ts index 482d056d1..8e8e9f98c 100644 --- a/interface/src/api/client.ts +++ b/interface/src/api/client.ts @@ -564,6 +564,8 @@ export interface BrowserSection { enabled: boolean; headless: boolean; evaluate_enabled: boolean; + persist_session: boolean; + close_policy: "close_browser" | "close_tabs" | "detach"; } export interface SandboxSection { @@ -646,6 +648,8 @@ export interface BrowserUpdate { enabled?: boolean; headless?: boolean; evaluate_enabled?: boolean; + persist_session?: boolean; + close_policy?: "close_browser" | "close_tabs" | "detach"; } export interface SandboxUpdate { diff --git a/interface/src/routes/AgentConfig.tsx b/interface/src/routes/AgentConfig.tsx index 5924be3d9..d9a2ecded 100644 --- a/interface/src/routes/AgentConfig.tsx +++ b/interface/src/routes/AgentConfig.tsx @@ -820,6 +820,29 @@ function ConfigSectionEditor({ sectionId, label, description, detail, config, on value={localValues.evaluate_enabled as boolean} onChange={(v) => handleChange("evaluate_enabled", v)} /> + handleChange("persist_session", v)} + /> +
+ +

What happens when a worker calls "close" or finishes.

+ +
); case "sandbox": diff --git a/src/api/agents.rs b/src/api/agents.rs index 81999c552..34e6d2b82 100644 --- a/src/api/agents.rs +++ b/src/api/agents.rs @@ -784,6 +784,7 @@ pub(super) async fn create_agent( brave_search_key, runtime_config.workspace_dir.clone(), sandbox.clone(), + runtime_config.shared_browser.clone(), ); let cortex_store = crate::agent::cortex_chat::CortexChatStore::new(db.sqlite.clone()); let cortex_session = crate::agent::cortex_chat::CortexChatSession::new( diff --git a/src/api/config.rs b/src/api/config.rs index 6653fe250..644095195 100644 --- a/src/api/config.rs +++ b/src/api/config.rs @@ -73,6 +73,8 @@ pub(super) struct BrowserSection { enabled: bool, headless: bool, evaluate_enabled: bool, + persist_session: bool, + close_policy: String, } #[derive(Serialize, Debug)] @@ -199,6 +201,8 @@ pub(super) struct BrowserUpdate { enabled: Option, headless: Option, evaluate_enabled: Option, + persist_session: Option, + close_policy: Option, } #[derive(Deserialize, Debug)] @@ -286,6 +290,8 @@ pub(super) async fn get_agent_config( enabled: browser.enabled, headless: browser.headless, evaluate_enabled: browser.evaluate_enabled, + persist_session: browser.persist_session, + close_policy: browser.close_policy.as_str().to_string(), }, sandbox: SandboxSection { mode: match sandbox.mode { @@ -672,6 +678,12 @@ fn update_browser_table( if let Some(v) = browser.evaluate_enabled { table["evaluate_enabled"] = toml_edit::value(v); } + if let Some(v) = browser.persist_session { + table["persist_session"] = toml_edit::value(v); + } + if let Some(ref v) = browser.close_policy { + table["close_policy"] = toml_edit::value(v.as_str()); + } Ok(()) } diff --git a/src/config/load.rs b/src/config/load.rs index 4abd1c0b0..e5f0a2d99 100644 --- a/src/config/load.rs +++ b/src/config/load.rs @@ -10,13 +10,14 @@ use super::providers::{ }; use super::toml_schema::*; use super::{ - AgentConfig, ApiConfig, ApiType, Binding, BrowserConfig, CoalesceConfig, CompactionConfig, - Config, CortexConfig, CronDef, DefaultsConfig, DiscordConfig, DiscordInstanceConfig, - EmailConfig, EmailInstanceConfig, GroupDef, HumanDef, IngestionConfig, LinkDef, LlmConfig, - McpServerConfig, McpTransport, MemoryPersistenceConfig, MessagingConfig, MetricsConfig, - OpenCodeConfig, ProviderConfig, SlackCommandConfig, SlackConfig, SlackInstanceConfig, - TelegramConfig, TelegramInstanceConfig, TelemetryConfig, TwitchConfig, TwitchInstanceConfig, - WarmupConfig, WebhookConfig, normalize_adapter, validate_named_messaging_adapters, + AgentConfig, ApiConfig, ApiType, Binding, BrowserConfig, ClosePolicy, CoalesceConfig, + CompactionConfig, Config, CortexConfig, CronDef, DefaultsConfig, DiscordConfig, + DiscordInstanceConfig, EmailConfig, EmailInstanceConfig, GroupDef, HumanDef, IngestionConfig, + LinkDef, LlmConfig, McpServerConfig, McpTransport, MemoryPersistenceConfig, MessagingConfig, + MetricsConfig, OpenCodeConfig, ProviderConfig, SlackCommandConfig, SlackConfig, + SlackInstanceConfig, TelegramConfig, TelegramInstanceConfig, TelemetryConfig, TwitchConfig, + TwitchInstanceConfig, WarmupConfig, WebhookConfig, normalize_adapter, + validate_named_messaging_adapters, }; use crate::error::{ConfigError, Result}; @@ -110,6 +111,21 @@ pub(super) fn warn_unknown_config_keys(content: &str) { } } +fn parse_close_policy(value: Option<&str>) -> Option { + match value? { + "close_browser" => Some(ClosePolicy::CloseBrowser), + "close_tabs" => Some(ClosePolicy::CloseTabs), + "detach" => Some(ClosePolicy::Detach), + other => { + tracing::warn!( + value = other, + "unknown close_policy value, expected one of: close_browser, close_tabs, detach" + ); + None + } + } +} + fn parse_otlp_headers(value: Option) -> Result> { let Some(raw) = value else { return Ok(HashMap::new()); @@ -1393,6 +1409,9 @@ impl Config { .screenshot_dir .map(PathBuf::from) .or_else(|| base.screenshot_dir.clone()), + persist_session: b.persist_session.unwrap_or(base.persist_session), + close_policy: parse_close_policy(b.close_policy.as_deref()) + .unwrap_or(base.close_policy), chrome_cache_dir: chrome_cache_dir.clone(), } }) @@ -1590,6 +1609,11 @@ impl Config { .screenshot_dir .map(PathBuf::from) .or_else(|| defaults.browser.screenshot_dir.clone()), + persist_session: b + .persist_session + .unwrap_or(defaults.browser.persist_session), + close_policy: parse_close_policy(b.close_policy.as_deref()) + .unwrap_or(defaults.browser.close_policy), chrome_cache_dir: defaults.browser.chrome_cache_dir.clone(), }), mcp: match a.mcp { diff --git a/src/config/runtime.rs b/src/config/runtime.rs index e645d1a53..6700f82c1 100644 --- a/src/config/runtime.rs +++ b/src/config/runtime.rs @@ -9,6 +9,7 @@ use super::{ WarmupConfig, WarmupStatus, WorkReadiness, evaluate_work_readiness, }; use crate::llm::routing::RoutingConfig; +use crate::tools::browser::SharedBrowserHandle; /// Live configuration that can be hot-reloaded without restarting. /// @@ -64,6 +65,12 @@ pub struct RuntimeConfig { /// Wrapped in `Arc` so it can be shared with the `Sandbox` struct, which /// reads the current mode dynamically on every `wrap()` call. pub sandbox: Arc>, + /// Shared browser state for persistent sessions. + /// + /// When `browser.persist_session = true`, all workers share this handle so + /// the browser process and tabs survive across worker lifetimes. When + /// `persist_session = false` this is `None` and each worker creates its own. + pub shared_browser: Option, } impl RuntimeConfig { @@ -117,6 +124,11 @@ impl RuntimeConfig { settings: ArcSwap::from_pointee(None), secrets: ArcSwap::from_pointee(None), sandbox: Arc::new(ArcSwap::from_pointee(agent_config.sandbox.clone())), + shared_browser: if agent_config.browser.persist_session { + Some(crate::tools::browser::new_shared_browser_handle()) + } else { + None + }, } } @@ -188,6 +200,16 @@ impl RuntimeConfig { .store(Arc::new(resolved.max_concurrent_branches)); self.max_concurrent_workers .store(Arc::new(resolved.max_concurrent_workers)); + let old_persist = self.browser_config.load().persist_session; + let new_persist = resolved.browser.persist_session; + if old_persist != new_persist { + tracing::warn!( + agent_id, + old = old_persist, + new = new_persist, + "persist_session changed — restart the agent for this to take effect" + ); + } self.browser_config.store(Arc::new(resolved.browser)); self.mcp.store(Arc::new(new_mcp.clone())); self.history_backfill_count diff --git a/src/config/toml_schema.rs b/src/config/toml_schema.rs index 55e73b347..3dd3b898d 100644 --- a/src/config/toml_schema.rs +++ b/src/config/toml_schema.rs @@ -364,6 +364,8 @@ pub(super) struct TomlBrowserConfig { pub(super) evaluate_enabled: Option, pub(super) executable_path: Option, pub(super) screenshot_dir: Option, + pub(super) persist_session: Option, + pub(super) close_policy: Option, } #[derive(Deserialize)] diff --git a/src/config/types.rs b/src/config/types.rs index ddc15310a..18870bede 100644 --- a/src/config/types.rs +++ b/src/config/types.rs @@ -698,6 +698,35 @@ impl Default for IngestionConfig { } } +/// What happens when a worker finishes or explicitly calls "close" on the browser. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum ClosePolicy { + /// Kill the browser process and reset all state (current default behavior). + #[default] + CloseBrowser, + /// Close all tabs but leave the browser process running. + CloseTabs, + /// Disconnect from the browser without touching tabs or the process. + Detach, +} + +impl ClosePolicy { + pub fn as_str(self) -> &'static str { + match self { + Self::CloseBrowser => "close_browser", + Self::CloseTabs => "close_tabs", + Self::Detach => "detach", + } + } +} + +impl std::fmt::Display for ClosePolicy { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(self.as_str()) + } +} + /// Browser automation configuration for workers. #[derive(Debug, Clone)] pub struct BrowserConfig { @@ -711,6 +740,12 @@ pub struct BrowserConfig { pub executable_path: Option, /// Directory for storing screenshots and other browser artifacts. pub screenshot_dir: Option, + /// Keep the browser alive across worker lifetimes. When true, all workers + /// for this agent share a single browser connection and tabs survive between + /// worker runs. Cookies, localStorage, and login sessions persist. + pub persist_session: bool, + /// Controls what happens when a worker calls "close" or finishes. + pub close_policy: ClosePolicy, /// Directory for caching a fetcher-downloaded Chromium binary. /// Populated from `{instance_dir}/chrome_cache` during config resolution. pub chrome_cache_dir: PathBuf, @@ -724,6 +759,8 @@ impl Default for BrowserConfig { evaluate_enabled: false, executable_path: None, screenshot_dir: None, + persist_session: false, + close_policy: ClosePolicy::default(), chrome_cache_dir: PathBuf::from("chrome_cache"), } } diff --git a/src/main.rs b/src/main.rs index 6c5a68948..833becdb5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2815,6 +2815,7 @@ async fn initialize_agents( brave_search_key, agent.deps.runtime_config.workspace_dir.clone(), agent.deps.sandbox.clone(), + agent.deps.runtime_config.shared_browser.clone(), ); let store = spacebot::agent::cortex_chat::CortexChatStore::new(agent.db.sqlite.clone()); let session = spacebot::agent::cortex_chat::CortexChatSession::new( diff --git a/src/tools.rs b/src/tools.rs index 4275d1d2a..48bb7afa1 100644 --- a/src/tools.rs +++ b/src/tools.rs @@ -57,7 +57,7 @@ pub mod worker_inspect; pub use branch_tool::{BranchArgs, BranchError, BranchOutput, BranchTool}; pub use browser::{ ActKind, BrowserAction, BrowserArgs, BrowserError, BrowserOutput, BrowserTool, ElementSummary, - TabInfo, + SharedBrowserHandle, TabInfo, }; pub use cancel::{CancelArgs, CancelError, CancelOutput, CancelTool}; pub use channel_recall::{ @@ -432,7 +432,12 @@ pub fn create_worker_tool_server( } if browser_config.enabled { - server = server.tool(BrowserTool::new(browser_config, screenshot_dir)); + let browser_tool = if let Some(shared) = &runtime_config.shared_browser { + BrowserTool::new_shared(shared.clone(), browser_config, screenshot_dir) + } else { + BrowserTool::new(browser_config, screenshot_dir) + }; + server = server.tool(browser_tool); } if let Some(key) = brave_search_key { @@ -474,6 +479,7 @@ pub fn create_cortex_chat_tool_server( brave_search_key: Option, workspace: PathBuf, sandbox: Arc, + shared_browser: Option, ) -> ToolServerHandle { let mut server = ToolServer::new() .tool(MemorySaveTool::new(memory_search.clone())) @@ -493,7 +499,12 @@ pub fn create_cortex_chat_tool_server( .tool(ExecTool::new(workspace, sandbox)); if browser_config.enabled { - server = server.tool(BrowserTool::new(browser_config, screenshot_dir)); + let browser_tool = if let Some(shared) = shared_browser { + BrowserTool::new_shared(shared, browser_config, screenshot_dir) + } else { + BrowserTool::new(browser_config, screenshot_dir) + }; + server = server.tool(browser_tool); } if let Some(key) = brave_search_key { diff --git a/src/tools/browser.rs b/src/tools/browser.rs index 579c9b5c3..640b56be4 100644 --- a/src/tools/browser.rs +++ b/src/tools/browser.rs @@ -125,6 +125,17 @@ fn is_v4_mapped_blocked(ip: Ipv6Addr) -> bool { } } +/// Opaque handle to shared browser state that persists across worker lifetimes. +/// +/// Held by `RuntimeConfig` when `persist_session = true`. All workers for the +/// same agent clone this handle and share a single browser process / tab set. +pub type SharedBrowserHandle = Arc>; + +/// Create a new shared browser handle for use in `RuntimeConfig`. +pub fn new_shared_browser_handle() -> SharedBrowserHandle { + Arc::new(Mutex::new(BrowserState::new())) +} + /// Tool for browser automation (worker-only). #[derive(Debug, Clone)] pub struct BrowserTool { @@ -133,8 +144,12 @@ pub struct BrowserTool { screenshot_dir: PathBuf, } -/// Internal browser state managed across tool invocations within a single worker. -struct BrowserState { +/// Internal browser state managed across tool invocations. +/// +/// When `persist_session` is enabled this struct lives in `RuntimeConfig` (via +/// `SharedBrowserHandle`) and is shared across worker lifetimes. Otherwise each +/// `BrowserTool` owns its own instance. +pub struct BrowserState { browser: Option, /// Background task driving the CDP WebSocket handler. _handler_task: Option>, @@ -151,6 +166,20 @@ struct BrowserState { user_data_dir: Option, } +impl BrowserState { + fn new() -> Self { + Self { + browser: None, + _handler_task: None, + pages: HashMap::new(), + active_target: None, + element_refs: HashMap::new(), + next_ref: 0, + user_data_dir: None, + } + } +} + impl Drop for BrowserState { fn drop(&mut self) { // Browser and handler task are dropped automatically — @@ -200,17 +229,27 @@ struct ElementRef { } impl BrowserTool { + /// Create a tool with its own isolated browser state (default, non-persistent). pub fn new(config: BrowserConfig, screenshot_dir: PathBuf) -> Self { Self { - state: Arc::new(Mutex::new(BrowserState { - browser: None, - _handler_task: None, - pages: HashMap::new(), - active_target: None, - element_refs: HashMap::new(), - next_ref: 0, - user_data_dir: None, - })), + state: Arc::new(Mutex::new(BrowserState::new())), + config, + screenshot_dir, + } + } + + /// Create a tool backed by a shared browser state handle. + /// + /// Used when `persist_session = true`. Multiple workers share the same + /// `SharedBrowserHandle`, so the browser process and tabs survive across + /// worker lifetimes. + pub fn new_shared( + shared_state: SharedBrowserHandle, + config: BrowserConfig, + screenshot_dir: PathBuf, + ) -> Self { + Self { + state: shared_state, config, screenshot_dir, } @@ -496,11 +535,14 @@ impl Tool for BrowserTool { impl BrowserTool { async fn handle_launch(&self) -> Result { - // Quick check under the lock — don't hold it across the potentially - // long resolve + launch sequence. + // Quick check under the lock — if a browser is already running and + // we're in persistent mode, reconnect to existing tabs. { - let state = self.state.lock().await; + let mut state = self.state.lock().await; if state.browser.is_some() { + if self.config.persist_session { + return self.reconnect_existing_tabs(&mut state).await; + } return Ok(BrowserOutput::success("Browser already running")); } } @@ -551,7 +593,12 @@ impl BrowserTool { // Another call launched while we were downloading/starting. Clean up // the browser we just created and return success. drop(browser); + handler_task.abort(); let _ = std::fs::remove_dir_all(&user_data_dir); + + if self.config.persist_session { + return self.reconnect_existing_tabs(&mut state).await; + } return Ok(BrowserOutput::success("Browser already running")); } @@ -563,6 +610,71 @@ impl BrowserTool { Ok(BrowserOutput::success("Browser launched successfully")) } + /// Discover existing tabs from the browser and populate the page map. + /// + /// Called when a worker connects to an already-running persistent browser. + /// Returns tab info so the LLM knows what's already open. + async fn reconnect_existing_tabs( + &self, + state: &mut BrowserState, + ) -> Result { + let browser = state + .browser + .as_ref() + .ok_or_else(|| BrowserError::new("browser not launched"))?; + + let pages = browser.pages().await.map_err(|error| { + BrowserError::new(format!("failed to enumerate existing tabs: {error}")) + })?; + + let mut discovered = 0usize; + for page in pages { + let target_id = page_target_id(&page); + if !state.pages.contains_key(&target_id) { + state.pages.insert(target_id.clone(), page); + discovered += 1; + } + } + + // If there's no active target but we have pages, pick the first one. + if state.active_target.is_none() + && let Some(id) = state.pages.keys().next().cloned() + { + state.active_target = Some(id); + } + + let tab_count = state.pages.len(); + let mut tabs = Vec::with_capacity(tab_count); + for (id, page) in &state.pages { + let title = page.get_title().await.ok().flatten(); + let url = page.url().await.ok().flatten(); + let is_active = state.active_target.as_deref() == Some(id); + tabs.push(TabInfo { + target_id: id.clone(), + url, + title, + active: is_active, + }); + } + + tracing::info!(tab_count, discovered, "reconnected to persistent browser"); + + Ok(BrowserOutput { + success: true, + message: format!( + "Connected to persistent browser ({tab_count} tab{} open, {discovered} newly discovered)", + if tab_count == 1 { "" } else { "s" } + ), + url: None, + title: None, + elements: None, + tabs: Some(tabs), + screenshot_path: None, + eval_result: None, + content: None, + }) + } + async fn handle_navigate(&self, url: Option) -> Result { let Some(url) = url else { return Err(BrowserError::new("url is required for navigate action")); @@ -1011,33 +1123,71 @@ impl BrowserTool { } async fn handle_close(&self) -> Result { + use crate::config::ClosePolicy; + let mut state = self.state.lock().await; - if let Some(mut browser) = state.browser.take() - && let Err(error) = browser.close().await - { - tracing::warn!(%error, "browser close returned error"); - } + match self.config.close_policy { + ClosePolicy::Detach => { + // Just clear per-worker state (element refs, active target) + // but leave the browser, tabs, and handler alive. + state.element_refs.clear(); + state.next_ref = 0; + state.active_target = None; + tracing::info!(policy = "detach", "worker detached from browser"); + Ok(BrowserOutput::success( + "Detached from browser (tabs and session preserved)", + )) + } + ClosePolicy::CloseTabs => { + // Close all tracked tabs but keep the browser process alive. + for (id, page) in state.pages.drain() { + if let Err(error) = page.close().await { + tracing::debug!(target_id = %id, %error, "failed to close tab"); + } + } + state.active_target = None; + state.element_refs.clear(); + state.next_ref = 0; + tracing::info!( + policy = "close_tabs", + "closed all tabs, browser still running" + ); + Ok(BrowserOutput::success( + "All tabs closed (browser still running)", + )) + } + ClosePolicy::CloseBrowser => { + // Full teardown — kill the browser process and clean up everything. + if let Some(mut browser) = state.browser.take() + && let Err(error) = browser.close().await + { + tracing::warn!(%error, "browser close returned error"); + } - state.pages.clear(); - state.active_target = None; - state.element_refs.clear(); - state.next_ref = 0; - state._handler_task = None; + state.pages.clear(); + state.active_target = None; + state.element_refs.clear(); + state.next_ref = 0; + if let Some(task) = state._handler_task.take() { + task.abort(); + } - // Clean up the per-launch user data dir to free disk space. - if let Some(dir) = state.user_data_dir.take() - && let Err(error) = tokio::fs::remove_dir_all(&dir).await - { - tracing::debug!( - path = %dir.display(), - %error, - "failed to clean up browser user data dir" - ); - } + // Clean up the per-launch user data dir to free disk space. + if let Some(dir) = state.user_data_dir.take() + && let Err(error) = tokio::fs::remove_dir_all(&dir).await + { + tracing::debug!( + path = %dir.display(), + %error, + "failed to clean up browser user data dir" + ); + } - tracing::info!("browser closed"); - Ok(BrowserOutput::success("Browser closed")) + tracing::info!(policy = "close_browser", "browser closed"); + Ok(BrowserOutput::success("Browser closed")) + } + } } /// Get the active page, or create a first one if the browser has no pages yet. From 55d80f87da1e984f8eb40e308b4a080b76afe8e8 Mon Sep 17 00:00:00 2001 From: James Pine Date: Tue, 3 Mar 2026 23:54:15 -0800 Subject: [PATCH 2/2] fix: address review feedback on persistent browser PR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - P1: Use typed ClosePolicy enum in BrowserUpdate API layer instead of raw String, so invalid values get a 400 error on deserialization rather than being silently persisted to config.toml. - P2: Rebuild tab map in reconnect_existing_tabs from browser.pages() instead of append-only, pruning stale entries. Validate active_target points to a live page after refresh. - P2: Reduce mutex hold time in handle_close — CloseTabs drains pages under the lock then closes them outside it; CloseBrowser takes state out under the lock then tears down outside it. - P3: Preserve active_target on Detach so next worker picks up exactly where the previous one left off (element_refs still cleared). - P3: Gate shared browser mode on both persist_session config flag AND shared_browser handle presence, not just handle existence. - P3: Fix doc inconsistency — 'single browser per worker' limitation now conditional on persist_session mode. Tighten ClosePolicy doc comment to only mention explicit close action. - P4: Use async tokio::fs::remove_dir_all in launch race path and CloseBrowser teardown to avoid blocking while holding the mutex. - Propagate close errors as BrowserError instead of swallowing them, so the LLM sees failures and can recover. --- docs/content/docs/(features)/browser.mdx | 4 +- src/api/config.rs | 5 +- src/config/types.rs | 2 +- src/tools.rs | 17 ++- src/tools/browser.rs | 137 +++++++++++++++-------- 5 files changed, 110 insertions(+), 55 deletions(-) diff --git a/docs/content/docs/(features)/browser.mdx b/docs/content/docs/(features)/browser.mdx index b7a66fa02..4c299022f 100644 --- a/docs/content/docs/(features)/browser.mdx +++ b/docs/content/docs/(features)/browser.mdx @@ -153,7 +153,7 @@ When `enabled = false`, the browser tool is not registered on worker ToolServers ### Close Policy -Controls what happens when a worker calls `close` or finishes: +Controls what happens when a worker calls `close`: | Policy | Behavior | |--------|----------| @@ -228,5 +228,5 @@ Element resolution: refs map to CSS selectors built from `[role='...']` and `[ar - **No file upload/download** -- chromiumoxide doesn't expose file chooser interception. Use `shell` + `curl` for downloads. - **No network interception** -- request blocking and response modification aren't exposed through the tool, though chromiumoxide supports it via raw CDP commands. - **No cookie/storage management** -- not exposed as tool actions. Could be added if needed. -- **Single browser per worker** -- each worker gets one Chrome process. No connection pooling across workers. +- **Single browser per worker by default** -- with `persist_session = false` (default), each worker gets its own Chrome process. With `persist_session = true`, all workers for an agent share one browser and reconnect to existing tabs on `launch`. - **Selector fragility** -- the `[role][aria-label]` selector strategy works for well-structured pages but can fail on pages with missing ARIA attributes. The `content` action + `evaluate` (when enabled) serve as fallbacks. diff --git a/src/api/config.rs b/src/api/config.rs index 644095195..471a34a32 100644 --- a/src/api/config.rs +++ b/src/api/config.rs @@ -1,4 +1,5 @@ use super::state::ApiState; +use crate::config::ClosePolicy; use axum::Json; use axum::extract::{Query, State}; @@ -202,7 +203,7 @@ pub(super) struct BrowserUpdate { headless: Option, evaluate_enabled: Option, persist_session: Option, - close_policy: Option, + close_policy: Option, } #[derive(Deserialize, Debug)] @@ -681,7 +682,7 @@ fn update_browser_table( if let Some(v) = browser.persist_session { table["persist_session"] = toml_edit::value(v); } - if let Some(ref v) = browser.close_policy { + if let Some(v) = browser.close_policy { table["close_policy"] = toml_edit::value(v.as_str()); } Ok(()) diff --git a/src/config/types.rs b/src/config/types.rs index 18870bede..874db5361 100644 --- a/src/config/types.rs +++ b/src/config/types.rs @@ -698,7 +698,7 @@ impl Default for IngestionConfig { } } -/// What happens when a worker finishes or explicitly calls "close" on the browser. +/// What happens when a worker explicitly calls "close" on the browser. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] pub enum ClosePolicy { diff --git a/src/tools.rs b/src/tools.rs index 5cb47d57d..0a7296592 100644 --- a/src/tools.rs +++ b/src/tools.rs @@ -460,7 +460,11 @@ pub fn create_worker_tool_server( } if browser_config.enabled { - let browser_tool = if let Some(shared) = &runtime_config.shared_browser { + let browser_tool = if let Some(shared) = runtime_config + .shared_browser + .as_ref() + .filter(|_| browser_config.persist_session) + { BrowserTool::new_shared(shared.clone(), browser_config, screenshot_dir) } else { BrowserTool::new(browser_config, screenshot_dir) @@ -527,11 +531,12 @@ pub fn create_cortex_chat_tool_server( .tool(ExecTool::new(workspace, sandbox)); if browser_config.enabled { - let browser_tool = if let Some(shared) = shared_browser { - BrowserTool::new_shared(shared, browser_config, screenshot_dir) - } else { - BrowserTool::new(browser_config, screenshot_dir) - }; + let browser_tool = + if let Some(shared) = shared_browser.filter(|_| browser_config.persist_session) { + BrowserTool::new_shared(shared, browser_config, screenshot_dir) + } else { + BrowserTool::new(browser_config, screenshot_dir) + }; server = server.tool(browser_tool); } diff --git a/src/tools/browser.rs b/src/tools/browser.rs index 640b56be4..60fffd9a9 100644 --- a/src/tools/browser.rs +++ b/src/tools/browser.rs @@ -594,7 +594,12 @@ impl BrowserTool { // the browser we just created and return success. drop(browser); handler_task.abort(); - let _ = std::fs::remove_dir_all(&user_data_dir); + // Clean up the temp user data dir asynchronously so we don't block + // while holding the shared mutex. + let dir = user_data_dir; + tokio::spawn(async move { + let _ = tokio::fs::remove_dir_all(&dir).await; + }); if self.config.persist_session { return self.reconnect_existing_tabs(&mut state).await; @@ -610,10 +615,17 @@ impl BrowserTool { Ok(BrowserOutput::success("Browser launched successfully")) } - /// Discover existing tabs from the browser and populate the page map. + /// Discover existing tabs from the browser and rebuild the page map. /// /// Called when a worker connects to an already-running persistent browser. - /// Returns tab info so the LLM knows what's already open. + /// Rebuilds `state.pages` from `browser.pages()` so stale entries (tabs + /// closed externally) are pruned. Validates that `active_target` still + /// points to a live page. + /// + /// Note: holds the mutex across CDP calls. `Browser::pages()` and the + /// per-tab title/url queries are quick CDP round-trips, and concurrent + /// browser use during reconnect is rare (workers typically call `launch` + /// once at the start of their run). async fn reconnect_existing_tabs( &self, state: &mut BrowserState, @@ -627,20 +639,26 @@ impl BrowserTool { BrowserError::new(format!("failed to enumerate existing tabs: {error}")) })?; - let mut discovered = 0usize; + // Rebuild the page map from the live browser, pruning stale entries. + let previous_ids: std::collections::HashSet = state.pages.keys().cloned().collect(); + let mut refreshed_pages = HashMap::with_capacity(pages.len()); for page in pages { let target_id = page_target_id(&page); - if !state.pages.contains_key(&target_id) { - state.pages.insert(target_id.clone(), page); - discovered += 1; - } + refreshed_pages.insert(target_id, page); } - - // If there's no active target but we have pages, pick the first one. - if state.active_target.is_none() - && let Some(id) = state.pages.keys().next().cloned() - { - state.active_target = Some(id); + let discovered = refreshed_pages + .keys() + .filter(|id| !previous_ids.contains(*id)) + .count(); + state.pages = refreshed_pages; + + // Ensure active_target points to a valid page. + let active_valid = state + .active_target + .as_ref() + .is_some_and(|id| state.pages.contains_key(id)); + if !active_valid { + state.active_target = state.pages.keys().next().cloned(); } let tab_count = state.pages.len(); @@ -1125,30 +1143,48 @@ impl BrowserTool { async fn handle_close(&self) -> Result { use crate::config::ClosePolicy; - let mut state = self.state.lock().await; - match self.config.close_policy { ClosePolicy::Detach => { - // Just clear per-worker state (element refs, active target) - // but leave the browser, tabs, and handler alive. + let mut state = self.state.lock().await; + // Clear per-worker element refs but preserve tabs, browser, + // handler, and active_target so the next worker picks up + // exactly where this one left off. state.element_refs.clear(); state.next_ref = 0; - state.active_target = None; tracing::info!(policy = "detach", "worker detached from browser"); Ok(BrowserOutput::success( "Detached from browser (tabs and session preserved)", )) } ClosePolicy::CloseTabs => { - // Close all tracked tabs but keep the browser process alive. - for (id, page) in state.pages.drain() { + // Drain pages under the lock, then close them outside it so + // other workers aren't blocked by CDP round-trips. + let pages_to_close: Vec<(String, chromiumoxide::Page)> = { + let mut state = self.state.lock().await; + let pages = state.pages.drain().collect(); + state.active_target = None; + state.element_refs.clear(); + state.next_ref = 0; + pages + }; + + let mut close_errors = Vec::new(); + for (id, page) in pages_to_close { if let Err(error) = page.close().await { - tracing::debug!(target_id = %id, %error, "failed to close tab"); + close_errors.push(format!("{id}: {error}")); } } - state.active_target = None; - state.element_refs.clear(); - state.next_ref = 0; + + if !close_errors.is_empty() { + let message = format!( + "failed to close {} tab(s): {}", + close_errors.len(), + close_errors.join("; ") + ); + tracing::warn!(policy = "close_tabs", %message); + return Err(BrowserError::new(message)); + } + tracing::info!( policy = "close_tabs", "closed all tabs, browser still running" @@ -1158,30 +1194,43 @@ impl BrowserTool { )) } ClosePolicy::CloseBrowser => { - // Full teardown — kill the browser process and clean up everything. - if let Some(mut browser) = state.browser.take() - && let Err(error) = browser.close().await - { - tracing::warn!(%error, "browser close returned error"); - } + // Take everything out of state under the lock, then do the + // actual teardown outside it. + let (browser, handler_task, user_data_dir) = { + let mut state = self.state.lock().await; + let browser = state.browser.take(); + let handler_task = state._handler_task.take(); + let user_data_dir = state.user_data_dir.take(); + state.pages.clear(); + state.active_target = None; + state.element_refs.clear(); + state.next_ref = 0; + (browser, handler_task, user_data_dir) + }; - state.pages.clear(); - state.active_target = None; - state.element_refs.clear(); - state.next_ref = 0; - if let Some(task) = state._handler_task.take() { + if let Some(task) = handler_task { task.abort(); } - // Clean up the per-launch user data dir to free disk space. - if let Some(dir) = state.user_data_dir.take() - && let Err(error) = tokio::fs::remove_dir_all(&dir).await + if let Some(mut browser) = browser + && let Err(error) = browser.close().await { - tracing::debug!( - path = %dir.display(), - %error, - "failed to clean up browser user data dir" - ); + let message = format!("failed to close browser: {error}"); + tracing::warn!(policy = "close_browser", %message); + return Err(BrowserError::new(message)); + } + + // Clean up the per-launch user data dir to free disk space. + if let Some(dir) = user_data_dir { + tokio::spawn(async move { + if let Err(error) = tokio::fs::remove_dir_all(&dir).await { + tracing::debug!( + path = %dir.display(), + %error, + "failed to clean up browser user data dir" + ); + } + }); } tracing::info!(policy = "close_browser", "browser closed");