From b123bb5edcac6e963044f1ac4761bcff4248e0aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20A=C3=9Fmus?= Date: Fri, 24 Oct 2025 06:27:22 +0200 Subject: [PATCH 01/14] Implement conversation compaction --- crates/code_assistant/src/acp/agent.rs | 89 +++-- crates/code_assistant/src/acp/types.rs | 7 + crates/code_assistant/src/acp/ui.rs | 4 + .../code_assistant/src/agent/persistence.rs | 2 +- crates/code_assistant/src/agent/runner.rs | 351 ++++++++++++++++-- crates/code_assistant/src/agent/tests.rs | 145 ++++++++ crates/code_assistant/src/app/gpui.rs | 15 +- crates/code_assistant/src/persistence.rs | 45 ++- crates/code_assistant/src/session/manager.rs | 40 +- crates/code_assistant/src/tests/mocks.rs | 6 + crates/code_assistant/src/ui/backend.rs | 14 +- crates/code_assistant/src/ui/gpui/elements.rs | 132 ++++++- crates/code_assistant/src/ui/gpui/mod.rs | 8 + .../src/ui/streaming/caret_processor.rs | 9 +- .../src/ui/streaming/json_processor.rs | 5 + crates/code_assistant/src/ui/streaming/mod.rs | 2 + .../src/ui/streaming/test_utils.rs | 7 + .../src/ui/streaming/xml_processor.rs | 9 +- crates/code_assistant/src/ui/terminal/ui.rs | 3 + crates/llm/src/anthropic.rs | 16 +- crates/llm/src/display.rs | 3 + crates/llm/src/openai_responses.rs | 8 + crates/llm/src/provider_config.rs | 2 + crates/llm/src/types.rs | 25 ++ docs/context-compaction.md | 36 ++ models.example.json | 11 + 26 files changed, 908 insertions(+), 86 deletions(-) create mode 100644 docs/context-compaction.md diff --git a/crates/code_assistant/src/acp/agent.rs b/crates/code_assistant/src/acp/agent.rs index 5dcf40e1..feba37ec 100644 --- a/crates/code_assistant/src/acp/agent.rs +++ b/crates/code_assistant/src/acp/agent.rs @@ -1,4 +1,6 @@ use agent_client_protocol as acp; +#[allow(unused_imports)] +use anyhow::Context; use anyhow::Result; use serde_json::{json, Map as JsonMap, Value as JsonValue}; use std::collections::{HashMap, HashSet}; @@ -321,14 +323,20 @@ impl acp::Agent for ACPAgentImpl { let session_id = { let mut manager = session_manager.lock().await; + let session_model_config = SessionModelConfig::for_model(model_name.clone(), None) + .map_err(|e| { + tracing::error!( + error = ?e, + "ACP: Failed to load model configuration for {}", + model_name + ); + to_acp_error(&e) + })?; manager .create_session_with_config( None, Some(session_config), - Some(SessionModelConfig { - model_name: model_name.clone(), - record_path: None, - }), + Some(session_model_config), ) .map_err(|e| { tracing::error!("Failed to create session: {}", e); @@ -344,13 +352,18 @@ impl acp::Agent for ACPAgentImpl { { if model_info.selection_changed { let mut manager = session_manager.lock().await; - if let Err(err) = manager.set_session_model_config( - &session_id, - Some(SessionModelConfig { - model_name: model_info.selected_model_name.clone(), - record_path: None, - }), - ) { + let fallback_model_config = + SessionModelConfig::for_model(model_info.selected_model_name.clone(), None) + .map_err(|e| { + tracing::error!( + error = ?e, + "ACP: Failed to build fallback model config" + ); + to_acp_error(&e) + })?; + if let Err(err) = + manager.set_session_model_config(&session_id, Some(fallback_model_config)) + { tracing::error!( error = ?err, "ACP: Failed to persist fallback model selection for session {}", @@ -450,12 +463,21 @@ impl acp::Agent for ACPAgentImpl { .as_ref() .and_then(|config| config.record_path.clone()); let mut manager = session_manager.lock().await; + let fallback_model_config = SessionModelConfig::for_model( + model_info.selected_model_name.clone(), + record_path, + ) + .map_err(|e| { + tracing::error!( + error = ?e, + "ACP: Failed to build fallback model config when loading session {}", + arguments.session_id.0 + ); + to_acp_error(&e) + })?; if let Err(err) = manager.set_session_model_config( &arguments.session_id.0, - Some(SessionModelConfig { - model_name: model_info.selected_model_name.clone(), - record_path, - }), + Some(fallback_model_config), ) { tracing::error!( error = ?err, @@ -541,9 +563,19 @@ impl acp::Agent for ACPAgentImpl { let session_model_config = match config_result { Ok(Some(config)) => config, - Ok(None) => SessionModelConfig { - model_name: model_name.clone(), - record_path: None, + Ok(None) => match SessionModelConfig::for_model(model_name.clone(), None) { + Ok(config) => config, + Err(e) => { + let error_msg = format!( + "Failed to load default model configuration for session {}: {e}", + arguments.session_id.0 + ); + tracing::error!("{}", error_msg); + let mut uis = active_uis.lock().await; + uis.remove(arguments.session_id.0.as_ref()); + let err = e.context(error_msg); + return Err(to_acp_error(&err)); + } }, Err(e) => { let error_msg = format!( @@ -827,13 +859,22 @@ impl acp::Agent for ACPAgentImpl { { let mut manager = session_manager.lock().await; - if let Err(err) = manager.set_session_model_config( - &session_id.0, - Some(SessionModelConfig { - model_name: display_name.clone(), - record_path, - }), + let new_model_config = match SessionModelConfig::for_model( + display_name.clone(), + record_path.clone(), ) { + Ok(config) => config, + Err(err) => { + tracing::error!( + error = ?err, + "ACP: Failed to build session model configuration" + ); + return Err(to_acp_error(&err)); + } + }; + if let Err(err) = + manager.set_session_model_config(&session_id.0, Some(new_model_config)) + { tracing::error!( error = ?err, "ACP: Failed to persist session model selection" diff --git a/crates/code_assistant/src/acp/types.rs b/crates/code_assistant/src/acp/types.rs index a736fb6b..bb9b25e2 100644 --- a/crates/code_assistant/src/acp/types.rs +++ b/crates/code_assistant/src/acp/types.rs @@ -15,6 +15,13 @@ pub fn fragment_to_content_block(fragment: &DisplayFragment) -> acp::ContentBloc text: text.clone(), meta: None, }), + DisplayFragment::CompactionDivider { summary } => { + acp::ContentBlock::Text(acp::TextContent { + annotations: None, + text: format!("Conversation compacted:\n{summary}"), + meta: None, + }) + } DisplayFragment::Image { media_type, data } => { acp::ContentBlock::Image(acp::ImageContent { annotations: None, diff --git a/crates/code_assistant/src/acp/ui.rs b/crates/code_assistant/src/acp/ui.rs index ba3cb61f..0bbaab12 100644 --- a/crates/code_assistant/src/acp/ui.rs +++ b/crates/code_assistant/src/acp/ui.rs @@ -653,6 +653,10 @@ impl UserInterface for ACPUserUI { let content = fragment_to_content_block(fragment); self.queue_session_update(acp::SessionUpdate::AgentMessageChunk { content }); } + DisplayFragment::CompactionDivider { .. } => { + let content = fragment_to_content_block(fragment); + self.queue_session_update(acp::SessionUpdate::AgentMessageChunk { content }); + } DisplayFragment::ThinkingText(_) => { let content = fragment_to_content_block(fragment); self.queue_session_update(acp::SessionUpdate::AgentThoughtChunk { content }); diff --git a/crates/code_assistant/src/agent/persistence.rs b/crates/code_assistant/src/agent/persistence.rs index a40c8486..1f7218d5 100644 --- a/crates/code_assistant/src/agent/persistence.rs +++ b/crates/code_assistant/src/agent/persistence.rs @@ -118,7 +118,7 @@ impl FileStatePersistence { ); let json = std::fs::read_to_string(&self.state_file_path)?; let mut session: ChatSession = serde_json::from_str(&json)?; - session.ensure_config(); + session.ensure_config()?; info!( "Loaded agent state with {} messages", diff --git a/crates/code_assistant/src/agent/runner.rs b/crates/code_assistant/src/agent/runner.rs index eb6d3856..8aeeab8e 100644 --- a/crates/code_assistant/src/agent/runner.rs +++ b/crates/code_assistant/src/agent/runner.rs @@ -6,7 +6,10 @@ use crate::session::SessionConfig; use crate::tools::core::{ResourcesTracker, ToolContext, ToolRegistry, ToolScope}; use crate::tools::{generate_system_message, ParserRegistry, ToolRequest}; use crate::types::*; -use crate::ui::{UiEvent, UserInterface}; +use crate::ui::gpui::elements::MessageRole as UiMessageRole; +use crate::ui::streaming::create_stream_processor; +use crate::ui::ui_events::{MessageData, ToolResultData}; +use crate::ui::{ToolStatus, UiEvent, UserInterface}; use crate::utils::CommandExecutor; use anyhow::Result; use llm::{ @@ -73,6 +76,16 @@ pub struct Agent { pending_message_ref: Option>>>, } +const CONTEXT_USAGE_THRESHOLD: f32 = 0.8; +const CONTEXT_COMPACTION_PROMPT: &str = r#" +The conversation history is nearing the model's context window limit. Provide a thorough summary that allows resuming the task without the earlier messages. Include: +- The current objectives or tasks. +- Key actions taken so far and their outcomes. +- Important files, commands, or decisions that matter for continuing. +- Outstanding questions or follow-up work that still needs attention. +Respond with plain text only. +"#; + impl Agent { /// Formats an error, particularly ToolErrors, into a user-friendly string. fn format_error_for_user(error: &anyhow::Error) -> String { @@ -305,6 +318,11 @@ impl Agent { .await?; } + if self.should_trigger_compaction()? { + self.perform_compaction().await?; + continue; + } + let messages = self.render_tool_results_in_messages(); // 1. Get LLM response (without adding to history yet) @@ -415,12 +433,10 @@ impl Agent { } self.session_name = session_state.name; self.session_model_config = session_state.model_config; - if let Some(model_hint) = self - .session_model_config - .as_ref() - .map(|cfg| cfg.model_name.clone()) - { - self.set_model_hint(Some(model_hint)); + if let Some(model_config) = self.session_model_config.as_mut() { + model_config.ensure_context_limit()?; + let model_name = model_config.model_name.clone(); + self.set_model_hint(Some(model_name)); } // Restore next_request_id from session, or calculate from existing messages for backward compatibility @@ -784,12 +800,15 @@ impl Agent { .map(|msg| { match &msg.content { MessageContent::Structured(blocks) => { - // Check if there are any ToolResult blocks that need conversion + // Check if there are any ToolResult or CompactionSummary blocks that need conversion let has_tool_results = blocks .iter() .any(|block| matches!(block, ContentBlock::ToolResult { .. })); + let has_compaction_summary = blocks + .iter() + .any(|block| matches!(block, ContentBlock::CompactionSummary { .. })); - if !has_tool_results { + if !has_tool_results && !has_compaction_summary { // No conversion needed return msg; } @@ -812,6 +831,12 @@ impl Agent { text_content.push_str(text); text_content.push_str("\n\n"); } + ContentBlock::CompactionSummary { text, .. } => { + let formatted = + Self::format_compaction_summary_for_prompt(text); + text_content.push_str(&formatted); + text_content.push_str("\n\n"); + } _ => {} // Ignore other block types } } @@ -1040,6 +1065,252 @@ impl Agent { Ok((response, request_id)) } + fn message_contains_compaction_summary(message: &Message) -> bool { + match &message.content { + MessageContent::Structured(blocks) => blocks + .iter() + .any(|block| matches!(block, ContentBlock::CompactionSummary { .. })), + _ => false, + } + } + + fn format_compaction_summary_for_prompt(summary: &str) -> String { + let trimmed = summary.trim(); + if trimmed.is_empty() { + "Conversation summary: (empty)".to_string() + } else { + format!("Conversation summary:\n{trimmed}") + } + } + + fn extract_compaction_summary_text(blocks: &[ContentBlock]) -> String { + let mut collected = Vec::new(); + for block in blocks { + match block { + ContentBlock::CompactionSummary { text, .. } => collected.push(text.as_str()), + ContentBlock::Text { text, .. } => collected.push(text.as_str()), + ContentBlock::Thinking { thinking, .. } => { + collected.push(thinking.as_str()); + } + _ => {} + } + } + + let merged = collected.join("\n").trim().to_string(); + if merged.is_empty() { + "No summary was generated.".to_string() + } else { + merged + } + } + + fn build_ui_messages(&self) -> Result> { + struct DummyUI; + + #[async_trait::async_trait] + impl crate::ui::UserInterface for DummyUI { + async fn send_event( + &self, + _event: crate::ui::UiEvent, + ) -> Result<(), crate::ui::UIError> { + Ok(()) + } + + fn display_fragment( + &self, + _fragment: &crate::ui::DisplayFragment, + ) -> Result<(), crate::ui::UIError> { + Ok(()) + } + + fn should_streaming_continue(&self) -> bool { + true + } + + fn notify_rate_limit(&self, _seconds_remaining: u64) {} + + fn clear_rate_limit(&self) {} + + fn as_any(&self) -> &dyn std::any::Any { + self + } + } + + let dummy_ui: Arc = Arc::new(DummyUI); + let mut processor = create_stream_processor(self.session_config.tool_syntax, dummy_ui, 0); + + let mut messages_data = Vec::new(); + + for message in &self.message_history { + if message.role == MessageRole::User { + match &message.content { + MessageContent::Text(text) if text.trim().is_empty() => { + continue; + } + MessageContent::Structured(blocks) => { + let has_tool_results = blocks + .iter() + .any(|block| matches!(block, ContentBlock::ToolResult { .. })); + if has_tool_results { + continue; + } + } + _ => {} + } + } + + let fragments = processor.extract_fragments_from_message(message)?; + let role = match message.role { + MessageRole::User => UiMessageRole::User, + MessageRole::Assistant => UiMessageRole::Assistant, + }; + + messages_data.push(MessageData { role, fragments }); + } + + Ok(messages_data) + } + + fn build_ui_tool_results(&self) -> Result> { + let mut tool_results = Vec::new(); + let mut resources_tracker = ResourcesTracker::new(); + + for execution in &self.tool_executions { + let success = execution.result.is_success(); + let status = if success { + ToolStatus::Success + } else { + ToolStatus::Error + }; + + let short_output = execution.result.as_render().status(); + let output = execution.result.as_render().render(&mut resources_tracker); + + tool_results.push(ToolResultData { + tool_id: execution.tool_request.id.clone(), + status, + message: Some(short_output), + output: Some(output), + }); + } + + Ok(tool_results) + } + + async fn refresh_ui_after_compaction(&self) -> Result<()> { + if self.session_id.is_none() { + return Ok(()); + } + + let messages = self.build_ui_messages()?; + let tool_results = self.build_ui_tool_results()?; + + self.ui + .send_event(UiEvent::SetMessages { + messages, + session_id: self.session_id.clone(), + tool_results, + }) + .await?; + + Ok(()) + } + + #[cfg(test)] + pub fn set_test_session_metadata( + &mut self, + session_id: String, + model_config: SessionModelConfig, + ) { + self.session_id = Some(session_id); + self.session_model_config = Some(model_config); + } + + #[cfg(test)] + pub fn message_history_for_tests(&self) -> &Vec { + &self.message_history + } + + fn context_usage_ratio(&mut self) -> Result> { + let config = match self.session_model_config.as_mut() { + Some(config) => { + config.ensure_context_limit()?; + config + } + None => return Ok(None), + }; + + let limit = config.context_token_limit; + if limit == 0 { + return Ok(None); + } + + let last_assistant = self + .message_history + .iter() + .rev() + .find(|msg| matches!(msg.role, MessageRole::Assistant)); + + if let Some(message) = last_assistant { + if Self::message_contains_compaction_summary(message) { + return Ok(None); + } + + if let Some(usage) = &message.usage { + let used_tokens = usage.input_tokens + usage.cache_read_input_tokens; + if used_tokens == 0 { + return Ok(None); + } + return Ok(Some(used_tokens as f32 / limit as f32)); + } + } + + Ok(None) + } + + fn should_trigger_compaction(&mut self) -> Result { + if let Some(ratio) = self.context_usage_ratio()? { + if ratio >= CONTEXT_USAGE_THRESHOLD { + debug!( + "Context usage {:.1}% >= threshold {:.0}% — triggering compaction", + ratio * 100.0, + CONTEXT_USAGE_THRESHOLD * 100.0 + ); + return Ok(true); + } + } + Ok(false) + } + + async fn perform_compaction(&mut self) -> Result<()> { + debug!("Starting context compaction"); + + let compaction_message = Message { + role: MessageRole::User, + content: MessageContent::Text(CONTEXT_COMPACTION_PROMPT.to_string()), + request_id: None, + usage: None, + }; + self.append_message(compaction_message)?; + + let messages = self.render_tool_results_in_messages(); + let (response, request_id) = self.get_next_assistant_message(messages).await?; + + let summary_text = Self::extract_compaction_summary_text(&response.content); + let summary_block = ContentBlock::new_compaction_summary(summary_text.clone()); + let summary_message = Message { + role: MessageRole::Assistant, + content: MessageContent::Structured(vec![summary_block]), + request_id: Some(request_id), + usage: Some(response.usage.clone()), + }; + self.append_message(summary_message)?; + + self.refresh_ui_after_compaction().await?; + + Ok(()) + } + /// Prepare messages for LLM request, dynamically rendering tool outputs fn render_tool_results_in_messages(&self) -> Vec { // Start with a clean slate @@ -1059,8 +1330,14 @@ impl Agent { tool_outputs.insert(tool_use_id.clone(), rendered_output); } + let start_index = self + .message_history + .iter() + .rposition(|msg| Self::message_contains_compaction_summary(msg)) + .unwrap_or(0); + // Now rebuild the message history, replacing tool outputs with our dynamically rendered versions - for msg in &self.message_history { + for msg in self.message_history.iter().skip(start_index) { match &msg.content { MessageContent::Structured(blocks) => { // Look for ToolResult blocks @@ -1068,32 +1345,46 @@ impl Agent { let mut need_update = false; for block in blocks { - if let ContentBlock::ToolResult { - tool_use_id, - is_error, - start_time, - end_time, - .. - } = block - { - // If we have an execution result for this tool use, use it - if let Some(output) = tool_outputs.get(tool_use_id) { - // Create a new ToolResult with updated content - new_blocks.push(ContentBlock::ToolResult { - tool_use_id: tool_use_id.clone(), - content: output.clone(), - is_error: *is_error, + match block { + ContentBlock::ToolResult { + tool_use_id, + is_error, + start_time, + end_time, + .. + } => { + // If we have an execution result for this tool use, use it + if let Some(output) = tool_outputs.get(tool_use_id) { + // Create a new ToolResult with updated content + new_blocks.push(ContentBlock::ToolResult { + tool_use_id: tool_use_id.clone(), + content: output.clone(), + is_error: *is_error, + start_time: *start_time, + end_time: *end_time, + }); + need_update = true; + } else { + // Keep the original block + new_blocks.push(block.clone()); + } + } + ContentBlock::CompactionSummary { + text, + start_time, + end_time, + } => { + new_blocks.push(ContentBlock::Text { + text: Self::format_compaction_summary_for_prompt(text), start_time: *start_time, end_time: *end_time, }); need_update = true; - } else { - // Keep the original block + } + _ => { + // Keep other blocks as is new_blocks.push(block.clone()); } - } else { - // Keep non-ToolResult blocks as is - new_blocks.push(block.clone()); } } diff --git a/crates/code_assistant/src/agent/tests.rs b/crates/code_assistant/src/agent/tests.rs index 9b39ace7..c2824dda 100644 --- a/crates/code_assistant/src/agent/tests.rs +++ b/crates/code_assistant/src/agent/tests.rs @@ -1,5 +1,6 @@ use super::*; use crate::agent::persistence::MockStatePersistence; +use crate::persistence::SessionModelConfig; use crate::session::SessionConfig; use crate::tests::mocks::MockLLMProvider; use crate::tests::mocks::{ @@ -8,6 +9,7 @@ use crate::tests::mocks::{ }; use crate::tests::utils::parse_and_truncate_llm_response; use crate::types::*; +use crate::ui::UiEvent; use anyhow::Result; use llm::types::*; use std::path::PathBuf; @@ -736,6 +738,149 @@ async fn test_parse_error_handling() -> Result<()> { Ok(()) } +#[tokio::test] +async fn test_context_compaction_inserts_summary() -> Result<()> { + let summary_text = "Summary of recent work"; + let summary_response = LLMResponse { + content: vec![ContentBlock::new_text(summary_text)], + usage: Usage { + input_tokens: 20, + output_tokens: 8, + cache_creation_input_tokens: 0, + cache_read_input_tokens: 0, + }, + rate_limit_info: None, + }; + + let idle_response = LLMResponse { + content: Vec::new(), + usage: Usage::zero(), + rate_limit_info: None, + }; + + let mock_llm = MockLLMProvider::new(vec![Ok(idle_response), Ok(summary_response)]); + let mock_llm_ref = mock_llm.clone(); + + let ui = Arc::new(MockUI::default()); + + let components = AgentComponents { + llm_provider: Box::new(mock_llm), + project_manager: Box::new(MockProjectManager::new()), + command_executor: Box::new(create_command_executor_mock()), + ui: ui.clone(), + state_persistence: Box::new(MockStatePersistence::new()), + }; + + let session_config = SessionConfig { + init_path: Some(PathBuf::from("./test_path")), + initial_project: String::new(), + tool_syntax: ToolSyntax::Native, + use_diff_blocks: false, + }; + + let mut agent = Agent::new(components, session_config); + agent.disable_naming_reminders(); + agent.set_test_session_metadata( + "session-1".to_string(), + SessionModelConfig { + model_name: "test-model".to_string(), + record_path: None, + context_token_limit: 100, + }, + ); + + agent.append_message(Message { + role: MessageRole::User, + content: MessageContent::Text("User request".to_string()), + request_id: None, + usage: None, + })?; + + agent.append_message(Message { + role: MessageRole::Assistant, + content: MessageContent::Structured(vec![ContentBlock::new_text( + "Assistant reply".to_string(), + )]), + request_id: Some(1), + usage: Some(Usage { + input_tokens: 85, + output_tokens: 12, + cache_creation_input_tokens: 0, + cache_read_input_tokens: 0, + }), + })?; + + agent.run_single_iteration().await?; + + // Ensure a compaction summary message was added + let summary_message = + agent + .message_history_for_tests() + .iter() + .find(|message| match &message.content { + MessageContent::Structured(blocks) => blocks + .iter() + .any(|block| matches!(block, ContentBlock::CompactionSummary { .. })), + _ => false, + }); + assert!( + summary_message.is_some(), + "Expected compaction summary in history" + ); + + // The compaction prompt should have been sent to the provider + let requests = mock_llm_ref.get_requests(); + assert_eq!(requests.len(), 2); + let compaction_request = &requests[0]; + let compaction_prompt_found = compaction_request.messages.iter().any(|message| { + matches!(&message.content, MessageContent::Text(text) if text.contains("system-compaction")) + }); + assert!( + compaction_prompt_found, + "Expected compaction prompt in LLM request" + ); + + // Ensure the UI received a SetMessages event with the compaction divider + let events = ui.events(); + let set_messages_event = events.iter().find_map(|event| { + if let UiEvent::SetMessages { messages, .. } = event { + Some(messages.clone()) + } else { + None + } + }); + let messages = set_messages_event.expect("Expected SetMessages event after compaction"); + let has_compaction_fragment = messages.iter().any(|message| { + message.fragments.iter().any(|fragment| { + matches!( + fragment, + crate::ui::DisplayFragment::CompactionDivider { .. } + ) + }) + }); + assert!( + has_compaction_fragment, + "Expected compaction divider fragment in UI messages" + ); + + // Subsequent prompt should include the summary content + let summary_in_followup = + requests[1].messages.iter().any(|message| { + match &message.content { + MessageContent::Structured(blocks) => blocks.iter().any(|block| { + matches!(block, ContentBlock::Text { text, .. } if text.contains(summary_text)) + }), + MessageContent::Text(text) => text.contains(summary_text), + } + }); + assert!( + summary_in_followup, + "Expected summary text in follow-up request" + ); + + Ok(()) +} + #[test] fn test_ui_filtering_with_failed_tool_messages() -> Result<()> { use crate::persistence::ChatSession; diff --git a/crates/code_assistant/src/app/gpui.rs b/crates/code_assistant/src/app/gpui.rs index fb4f0c09..58ad4312 100644 --- a/crates/code_assistant/src/app/gpui.rs +++ b/crates/code_assistant/src/app/gpui.rs @@ -28,6 +28,8 @@ pub fn run(config: AgentRunConfig) -> Result<()> { }; let default_model = config.model.clone(); + let base_session_model_config = + crate::persistence::SessionModelConfig::for_model(default_model.clone(), None)?; // Create the new SessionManager let multi_session_manager = Arc::new(Mutex::new(SessionManager::new( @@ -40,6 +42,7 @@ pub fn run(config: AgentRunConfig) -> Result<()> { let gui_for_thread = gui.clone(); let task_clone = config.task.clone(); let model = default_model; + let base_model_config = base_session_model_config.clone(); let record = config.record.clone(); let playback = config.playback.clone(); let fast_playback = config.fast_playback; @@ -53,10 +56,8 @@ pub fn run(config: AgentRunConfig) -> Result<()> { // Task provided - create new session and start agent debug!("Creating initial session with task: {}", initial_task); - let session_model_config = crate::persistence::SessionModelConfig { - model_name: model.clone(), - record_path: record.clone(), - }; + let mut session_model_config = base_model_config.clone(); + session_model_config.record_path = record.clone(); let session_id = { let mut manager = multi_session_manager.lock().await; @@ -144,10 +145,8 @@ pub fn run(config: AgentRunConfig) -> Result<()> { // Create a new session automatically let new_session_id = { - let model_config = crate::persistence::SessionModelConfig { - model_name: model.clone(), - record_path: record.clone(), - }; + let mut model_config = base_model_config.clone(); + model_config.record_path = record.clone(); let mut manager = multi_session_manager.lock().await; manager diff --git a/crates/code_assistant/src/persistence.rs b/crates/code_assistant/src/persistence.rs index 2bc154d6..dd05101f 100644 --- a/crates/code_assistant/src/persistence.rs +++ b/crates/code_assistant/src/persistence.rs @@ -1,4 +1,4 @@ -use anyhow::Result; +use anyhow::{anyhow, Result}; use llm::Message; use serde::{Deserialize, Serialize}; use std::collections::HashMap; @@ -18,6 +18,9 @@ pub struct SessionModelConfig { pub model_name: String, /// Optional recording path for this session pub record_path: Option, + /// Maximum context window supported by the model (token count) + #[serde(default)] + pub context_token_limit: u32, // Note: playback and fast_playback are runtime toggles, not persisted } @@ -76,7 +79,7 @@ pub struct ChatSession { impl ChatSession { /// Merge any legacy top-level fields into the nested SessionConfig. - pub fn ensure_config(&mut self) { + pub fn ensure_config(&mut self) -> Result<()> { if let Some(init_path) = self.legacy_init_path.take() { self.config.init_path = Some(init_path); } @@ -91,6 +94,10 @@ impl ChatSession { if let Some(use_diff_blocks) = self.legacy_use_diff_blocks.take() { self.config.use_diff_blocks = use_diff_blocks; } + if let Some(model_config) = self.model_config.as_mut() { + model_config.ensure_context_limit()?; + } + Ok(()) } /// Create a new empty chat session using the provided configuration. @@ -120,6 +127,36 @@ impl ChatSession { } } +impl SessionModelConfig { + /// Construct a session model configuration by looking up the model metadata. + pub fn for_model(model_name: String, record_path: Option) -> Result { + let config_system = llm::provider_config::ConfigurationSystem::load()?; + let limit = config_system + .get_model(&model_name) + .map(|model| model.context_token_limit) + .ok_or_else(|| anyhow!("Model not found in models.json: {}", model_name))?; + + Ok(Self { + model_name, + record_path, + context_token_limit: limit, + }) + } + + /// Ensure the context token limit is populated (for legacy session files). + pub fn ensure_context_limit(&mut self) -> Result<()> { + if self.context_token_limit == 0 { + let config_system = llm::provider_config::ConfigurationSystem::load()?; + let limit = config_system + .get_model(&self.model_name) + .map(|model| model.context_token_limit) + .ok_or_else(|| anyhow!("Model not found in models.json: {}", self.model_name))?; + self.context_token_limit = limit; + } + Ok(()) + } +} + /// A helper to obtain the tool syntax for this session without exposing legacy fields. impl ChatSession { pub fn tool_syntax(&self) -> ToolSyntax { @@ -199,7 +236,7 @@ impl FileSessionPersistence { pub fn save_chat_session(&mut self, session: &ChatSession) -> Result<()> { let mut session = session.clone(); - session.ensure_config(); + session.ensure_config()?; let session_path = self.chat_file_path(&session.id)?; debug!("Saving chat session to {}", session_path.display()); @@ -253,7 +290,7 @@ impl FileSessionPersistence { debug!("Loading chat session from {}", session_path.display()); let json = std::fs::read_to_string(session_path)?; let mut session: ChatSession = serde_json::from_str(&json)?; - session.ensure_config(); + session.ensure_config()?; Ok(Some(session)) } diff --git a/crates/code_assistant/src/session/manager.rs b/crates/code_assistant/src/session/manager.rs index a18bde36..0d7af47b 100644 --- a/crates/code_assistant/src/session/manager.rs +++ b/crates/code_assistant/src/session/manager.rs @@ -56,16 +56,13 @@ impl SessionManager { /// Create a new session and return its ID pub fn create_session(&mut self, name: Option) -> Result { // Always create sessions with a default model config - let default_model_config = self.default_model_config(); + let default_model_config = self.default_model_config()?; self.create_session_with_config(name, None, Some(default_model_config)) } /// Get the default model configuration - fn default_model_config(&self) -> SessionModelConfig { - SessionModelConfig { - model_name: self.default_model_name.clone(), - record_path: None, - } + fn default_model_config(&self) -> Result { + SessionModelConfig::for_model(self.default_model_name.clone(), None) } /// Create a new session with optional model config and return its ID @@ -163,11 +160,11 @@ impl SessionManager { if let Some(model_name) = existing_model_name { model_name } else { - let default_model_config = self.default_model_config(); + let default_model_config = self.default_model_config()?; let model_name = default_model_config.model_name.clone(); { let session_instance = self.active_sessions.get_mut(&session_id).unwrap(); - session_instance.session.model_config = Some(default_model_config); + session_instance.session.model_config = Some(default_model_config.clone()); } needs_persist = true; model_name @@ -419,11 +416,23 @@ impl SessionManager { /// Get the model config for a session, if any pub fn get_session_model_config(&self, session_id: &str) -> Result> { if let Some(instance) = self.active_sessions.get(session_id) { - Ok(instance.session.model_config.clone()) + if let Some(mut config) = instance.session.model_config.clone() { + config.ensure_context_limit()?; + Ok(Some(config)) + } else { + Ok(None) + } } else { // Load from persistence match self.persistence.load_chat_session(session_id)? { - Some(session) => Ok(session.model_config), + Some(mut session) => { + if let Some(mut config) = session.model_config.take() { + config.ensure_context_limit()?; + Ok(Some(config)) + } else { + Ok(None) + } + } None => Ok(None), } } @@ -440,11 +449,18 @@ impl SessionManager { .load_chat_session(session_id)? .ok_or_else(|| anyhow::anyhow!("Session not found: {session_id}"))?; - session.model_config = model_config.clone(); + let sanitized_model_config = if let Some(mut config) = model_config.clone() { + config.ensure_context_limit()?; + Some(config) + } else { + None + }; + + session.model_config = sanitized_model_config.clone(); self.persistence.save_chat_session(&session)?; if let Some(instance) = self.active_sessions.get_mut(session_id) { - instance.session.model_config = model_config; + instance.session.model_config = sanitized_model_config; } Ok(()) diff --git a/crates/code_assistant/src/tests/mocks.rs b/crates/code_assistant/src/tests/mocks.rs index a6a4fa21..c9e668a6 100644 --- a/crates/code_assistant/src/tests/mocks.rs +++ b/crates/code_assistant/src/tests/mocks.rs @@ -282,6 +282,12 @@ impl UserInterface for MockUI { .unwrap() .push("\n• Reasoning Complete".to_string()); } + crate::ui::DisplayFragment::CompactionDivider { summary } => { + self.streaming + .lock() + .unwrap() + .push(format!("[compaction] {summary}")); + } } Ok(()) } diff --git a/crates/code_assistant/src/ui/backend.rs b/crates/code_assistant/src/ui/backend.rs index cf042f7f..2786df60 100644 --- a/crates/code_assistant/src/ui/backend.rs +++ b/crates/code_assistant/src/ui/backend.rs @@ -454,9 +454,17 @@ async fn handle_switch_model( ); // Create new session model config - let new_model_config = SessionModelConfig { - model_name: model_name.to_string(), - record_path: None, // Keep existing recording path if any + let new_model_config = match SessionModelConfig::for_model(model_name.to_string(), None) { + Ok(config) => config, + Err(e) => { + error!( + "Failed to load model configuration for {}: {}", + model_name, e + ); + return BackendResponse::Error { + message: format!("Failed to load model configuration: {e}"), + }; + } }; let result = { diff --git a/crates/code_assistant/src/ui/gpui/elements.rs b/crates/code_assistant/src/ui/gpui/elements.rs index 49b4f57c..a62387e3 100644 --- a/crates/code_assistant/src/ui/gpui/elements.rs +++ b/crates/code_assistant/src/ui/gpui/elements.rs @@ -147,6 +147,20 @@ impl MessageContainer { cx.notify(); } + pub fn add_compaction_divider(&self, summary: impl Into, cx: &mut Context) { + self.finish_any_thinking_blocks(cx); + + let request_id = *self.current_request_id.lock().unwrap(); + let mut elements = self.elements.lock().unwrap(); + let block = BlockData::CompactionSummary(CompactionSummaryBlock { + summary: summary.into(), + is_expanded: false, + }); + let view = cx.new(|cx| BlockView::new(block, request_id, self.current_project.clone(), cx)); + elements.push(view); + cx.notify(); + } + // Add a new thinking block #[allow(dead_code)] pub fn add_thinking_block(&self, content: impl Into, cx: &mut Context) { @@ -613,6 +627,7 @@ pub enum BlockData { ThinkingBlock(ThinkingBlock), ToolUse(ToolUseBlock), ImageBlock(ImageBlock), + CompactionSummary(CompactionSummaryBlock), } impl BlockData { @@ -636,6 +651,13 @@ impl BlockData { _ => None, } } + + fn as_compaction_mut(&mut self) -> Option<&mut CompactionSummaryBlock> { + match self { + BlockData::CompactionSummary(b) => Some(b), + _ => None, + } + } } /// Entity view for a block @@ -684,7 +706,8 @@ impl BlockView { match &self.block { BlockData::ToolUse(_) => !self.is_generating, // Tools can't toggle while generating BlockData::ThinkingBlock(_) => true, // Thinking blocks can always toggle - _ => false, // Other blocks don't have expansion + BlockData::CompactionSummary(_) => true, + _ => false, // Other blocks don't have expansion } } @@ -723,6 +746,13 @@ impl BlockView { self.start_expand_collapse_animation(should_expand, cx); } + fn toggle_compaction(&mut self, cx: &mut Context) { + if let Some(summary) = self.block.as_compaction_mut() { + summary.is_expanded = !summary.is_expanded; + cx.notify(); + } + } + fn start_expand_collapse_animation(&mut self, should_expand: bool, cx: &mut Context) { let target = if should_expand { 1.0 } else { 0.0 }; let now = std::time::Instant::now(); @@ -1482,6 +1512,100 @@ impl Render for BlockView { ]) .into_any_element() } + BlockData::CompactionSummary(block) => { + let icon = file_icons::get().get_type_icon(file_icons::MESSAGE_BUBBLES); + let icon_color = cx.theme().info; + let toggle_label = if block.is_expanded { + "Hide summary" + } else { + "Show summary" + }; + + let header = div() + .flex() + .flex_row() + .items_center() + .justify_between() + .children(vec![ + div() + .flex() + .flex_row() + .items_center() + .gap_2() + .children(vec![ + file_icons::render_icon_container(&icon, 18.0, icon_color, "ℹ️") + .into_any_element(), + div() + .text_sm() + .font_weight(FontWeight(600.0)) + .text_color(icon_color) + .child("Conversation compacted") + .into_any_element(), + ]) + .into_any_element(), + div() + .text_sm() + .text_color(cx.theme().link) + .cursor_pointer() + .on_mouse_up( + MouseButton::Left, + cx.listener(|view, _event, _window, cx| { + view.toggle_compaction(cx); + }), + ) + .child(toggle_label) + .into_any_element(), + ]) + .into_any_element(); + + let mut children = vec![header]; + + if block.is_expanded { + children.push( + div() + .text_color(cx.theme().foreground) + .child( + TextView::markdown( + "compaction-summary", + block.summary.clone(), + window, + cx, + ) + .selectable(), + ) + .into_any_element(), + ); + } else { + let preview_text = block.summary.trim(); + if !preview_text.is_empty() { + let first_line = preview_text.lines().next().unwrap_or(""); + let truncated = if first_line.len() > 120 { + format!("{}…", &first_line[..120]) + } else { + first_line.to_string() + }; + children.push( + div() + .text_sm() + .text_color(cx.theme().muted_foreground) + .child(truncated) + .into_any_element(), + ); + } + } + + div() + .rounded_md() + .border_1() + .border_color(cx.theme().border) + .bg(cx.theme().popover) + .p_3() + .flex() + .flex_col() + .gap_2() + .children(children) + .into_any_element() + } BlockData::ImageBlock(block) => { if let Some(image) = &block.image { // Render the actual image - margins/spacing handled by parent container @@ -1540,6 +1664,12 @@ pub struct TextBlock { pub content: String, } +#[derive(Debug, Clone)] +pub struct CompactionSummaryBlock { + pub summary: String, + pub is_expanded: bool, +} + /// Thinking text block with collapsible content #[derive(Debug, Clone)] pub struct ThinkingBlock { diff --git a/crates/code_assistant/src/ui/gpui/mod.rs b/crates/code_assistant/src/ui/gpui/mod.rs index db058c0b..91e4a9d7 100644 --- a/crates/code_assistant/src/ui/gpui/mod.rs +++ b/crates/code_assistant/src/ui/gpui/mod.rs @@ -1013,6 +1013,11 @@ impl Gpui { container.add_image_block(media_type, data, cx); }); } + DisplayFragment::CompactionDivider { summary } => { + self.update_container(container, cx, |container, cx| { + container.add_compaction_divider(summary.clone(), cx); + }); + } DisplayFragment::ReasoningSummaryStart => { self.update_container(container, cx, |container, cx| { container.start_reasoning_summary_item(cx); @@ -1409,6 +1414,9 @@ impl UserInterface for Gpui { "GPUI: Tool {tool_id} attached terminal {terminal_id}; no dedicated UI hook" ); } + DisplayFragment::CompactionDivider { .. } => { + // Compaction summaries are handled via refresh events rather than streaming. + } DisplayFragment::ReasoningComplete => { self.push_event(UiEvent::CompleteReasoning); } diff --git a/crates/code_assistant/src/ui/streaming/caret_processor.rs b/crates/code_assistant/src/ui/streaming/caret_processor.rs index 0c690e50..b9a6f415 100644 --- a/crates/code_assistant/src/ui/streaming/caret_processor.rs +++ b/crates/code_assistant/src/ui/streaming/caret_processor.rs @@ -236,6 +236,11 @@ impl StreamProcessorTrait for CaretStreamProcessor { data: data.clone(), }); } + llm::ContentBlock::CompactionSummary { text, .. } => { + fragments.push(DisplayFragment::CompactionDivider { + summary: text.clone(), + }); + } } } } @@ -880,7 +885,9 @@ impl CaretStreamProcessor { // Tool parameter - emit immediately (we've already decided to allow the tool) self.ui.display_fragment(&fragment)?; } - DisplayFragment::PlainText(_) | DisplayFragment::ThinkingText(_) => { + DisplayFragment::PlainText(_) + | DisplayFragment::ThinkingText(_) + | DisplayFragment::CompactionDivider { .. } => { // Text or thinking - buffer it until we know if next tool is allowed if let StreamingState::BufferingAfterTool { buffered_fragments, .. diff --git a/crates/code_assistant/src/ui/streaming/json_processor.rs b/crates/code_assistant/src/ui/streaming/json_processor.rs index 26496b24..110eaed0 100644 --- a/crates/code_assistant/src/ui/streaming/json_processor.rs +++ b/crates/code_assistant/src/ui/streaming/json_processor.rs @@ -333,6 +333,11 @@ impl StreamProcessorTrait for JsonStreamProcessor { data: data.clone(), }); } + ContentBlock::CompactionSummary { text, .. } => { + fragments.push(DisplayFragment::CompactionDivider { + summary: text.clone(), + }); + } } } } diff --git a/crates/code_assistant/src/ui/streaming/mod.rs b/crates/code_assistant/src/ui/streaming/mod.rs index c68e3599..906179e4 100644 --- a/crates/code_assistant/src/ui/streaming/mod.rs +++ b/crates/code_assistant/src/ui/streaming/mod.rs @@ -51,6 +51,8 @@ pub enum DisplayFragment { ReasoningSummaryDelta(String), /// Mark reasoning as completed ReasoningComplete, + /// Divider indicating the conversation was compacted, with expandable summary text + CompactionDivider { summary: String }, } /// Common trait for stream processors diff --git a/crates/code_assistant/src/ui/streaming/test_utils.rs b/crates/code_assistant/src/ui/streaming/test_utils.rs index 9fdb7480..2213e969 100644 --- a/crates/code_assistant/src/ui/streaming/test_utils.rs +++ b/crates/code_assistant/src/ui/streaming/test_utils.rs @@ -168,6 +168,9 @@ pub fn print_fragments(fragments: &[DisplayFragment]) { terminal_id, } => println!(" [{i}] ToolTerminal(tool_id: {tool_id}, terminal_id: {terminal_id})"), DisplayFragment::ReasoningComplete => println!(" [{i}] ReasoningComplete"), + DisplayFragment::CompactionDivider { summary } => { + println!(" [{i}] CompactionDivider: {summary}"); + } } } } @@ -219,6 +222,10 @@ pub fn fragments_match(expected: &DisplayFragment, actual: &DisplayFragment) -> .. }, ) => expected_terminal == actual_terminal, + ( + DisplayFragment::CompactionDivider { summary: expected }, + DisplayFragment::CompactionDivider { summary: actual }, + ) => expected == actual, _ => false, } } diff --git a/crates/code_assistant/src/ui/streaming/xml_processor.rs b/crates/code_assistant/src/ui/streaming/xml_processor.rs index 8efa6187..cf4352a0 100644 --- a/crates/code_assistant/src/ui/streaming/xml_processor.rs +++ b/crates/code_assistant/src/ui/streaming/xml_processor.rs @@ -265,6 +265,11 @@ impl StreamProcessorTrait for XmlStreamProcessor { data: data.clone(), }); } + ContentBlock::CompactionSummary { text, .. } => { + fragments.push(DisplayFragment::CompactionDivider { + summary: text.clone(), + }); + } } } } @@ -716,7 +721,9 @@ impl XmlStreamProcessor { // Tool parameter - emit immediately (we've already decided to allow the tool) self.ui.display_fragment(&fragment)?; } - DisplayFragment::PlainText(_) | DisplayFragment::ThinkingText(_) => { + DisplayFragment::PlainText(_) + | DisplayFragment::ThinkingText(_) + | DisplayFragment::CompactionDivider { .. } => { // Text or thinking - buffer it if let StreamingState::BufferingAfterTool { buffered_fragments, .. diff --git a/crates/code_assistant/src/ui/terminal/ui.rs b/crates/code_assistant/src/ui/terminal/ui.rs index 49b96c7d..3680183e 100644 --- a/crates/code_assistant/src/ui/terminal/ui.rs +++ b/crates/code_assistant/src/ui/terminal/ui.rs @@ -422,6 +422,9 @@ impl UserInterface for TerminalTuiUI { "Tool {tool_id} attached client terminal {terminal_id}; terminal UI has no live view" ); } + DisplayFragment::CompactionDivider { .. } => { + // Compaction summaries are surfaced via refresh events in terminal UI. + } DisplayFragment::ReasoningComplete => { // For terminal UI, no specific action needed for reasoning completion } diff --git a/crates/llm/src/anthropic.rs b/crates/llm/src/anthropic.rs index 258f668d..292e3467 100644 --- a/crates/llm/src/anthropic.rs +++ b/crates/llm/src/anthropic.rs @@ -187,6 +187,11 @@ impl DefaultMessageConverter { AnthropicBlockContent::RedactedThinking { data }, false, ), + ContentBlock::CompactionSummary { text, .. } => ( + "text".to_string(), + AnthropicBlockContent::Text { text }, + true, + ), }; let cache_control = @@ -1138,6 +1143,10 @@ impl AnthropicClient { ContentBlock::ToolResult { end_time, .. } => { *end_time = Some(now); } + ContentBlock::CompactionSummary { text, end_time, .. } => { + *text = current_content.clone(); + *end_time = Some(now); + } } } _ => {} @@ -1188,6 +1197,10 @@ impl AnthropicClient { } *end_time = Some(now); } + ContentBlock::CompactionSummary { text, end_time, .. } => { + *text = current_content.clone(); + *end_time = Some(now); + } _ => {} } } @@ -1220,7 +1233,8 @@ impl AnthropicClient { | ContentBlock::Text { end_time, .. } | ContentBlock::Image { end_time, .. } | ContentBlock::ToolUse { end_time, .. } - | ContentBlock::ToolResult { end_time, .. } => { + | ContentBlock::ToolResult { end_time, .. } + | ContentBlock::CompactionSummary { end_time, .. } => { if end_time.is_none() { *end_time = Some(now); } diff --git a/crates/llm/src/display.rs b/crates/llm/src/display.rs index 55142ce5..f3704bd3 100644 --- a/crates/llm/src/display.rs +++ b/crates/llm/src/display.rs @@ -65,6 +65,9 @@ impl fmt::Display for ContentBlock { writeln!(f, "RedactedThinking")?; writeln!(f, " Data: {}", data.replace('\n', "\n ")) } + ContentBlock::CompactionSummary { text, .. } => { + writeln!(f, "CompactionSummary: {}", text.replace('\n', "\n ")) + } } } } diff --git a/crates/llm/src/openai_responses.rs b/crates/llm/src/openai_responses.rs index 177662bd..4175b09d 100644 --- a/crates/llm/src/openai_responses.rs +++ b/crates/llm/src/openai_responses.rs @@ -468,6 +468,14 @@ impl OpenAIResponsesClient { .push(ResponseContentItem::OutputText { text: thinking }); } }, + ContentBlock::CompactionSummary { text, .. } => match role { + MessageRole::User => { + current_message_content.push(ResponseContentItem::InputText { text }); + } + MessageRole::Assistant => { + current_message_content.push(ResponseContentItem::OutputText { text }); + } + }, // Non-message content blocks: flush current message and add as separate items ContentBlock::ToolResult { tool_use_id, diff --git a/crates/llm/src/provider_config.rs b/crates/llm/src/provider_config.rs index 24751d7f..4b7c28d6 100644 --- a/crates/llm/src/provider_config.rs +++ b/crates/llm/src/provider_config.rs @@ -26,6 +26,8 @@ pub struct ModelConfig { pub id: String, /// Model-specific configuration pub config: serde_json::Value, + /// Maximum context window supported by the model (token count) + pub context_token_limit: u32, } /// Configuration for all models (model_display_name -> ModelConfig) diff --git a/crates/llm/src/types.rs b/crates/llm/src/types.rs index e684cfca..4996b30c 100644 --- a/crates/llm/src/types.rs +++ b/crates/llm/src/types.rs @@ -162,6 +162,14 @@ pub enum ContentBlock { #[serde(skip_serializing_if = "Option::is_none")] end_time: Option, }, + #[serde(rename = "compaction_summary")] + CompactionSummary { + text: String, + #[serde(skip_serializing_if = "Option::is_none")] + start_time: Option, + #[serde(skip_serializing_if = "Option::is_none")] + end_time: Option, + }, } /// Rate limit information extracted from LLM provider headers @@ -307,6 +315,15 @@ impl ContentBlock { } } + /// Create a compaction summary block from a String + pub fn new_compaction_summary(text: impl Into) -> Self { + ContentBlock::CompactionSummary { + text: text.into(), + start_time: None, + end_time: None, + } + } + /// Create an image content block from raw image data pub fn new_image(media_type: impl Into, data: impl AsRef<[u8]>) -> Self { use base64::Engine as _; @@ -374,6 +391,7 @@ impl ContentBlock { ContentBlock::Image { start_time, .. } => *start_time, ContentBlock::ToolUse { start_time, .. } => *start_time, ContentBlock::ToolResult { start_time, .. } => *start_time, + ContentBlock::CompactionSummary { start_time, .. } => *start_time, } } @@ -386,6 +404,7 @@ impl ContentBlock { ContentBlock::Image { end_time, .. } => *end_time, ContentBlock::ToolUse { end_time, .. } => *end_time, ContentBlock::ToolResult { end_time, .. } => *end_time, + ContentBlock::CompactionSummary { end_time, .. } => *end_time, } } @@ -398,6 +417,7 @@ impl ContentBlock { ContentBlock::Image { start_time, .. } => *start_time = Some(time), ContentBlock::ToolUse { start_time, .. } => *start_time = Some(time), ContentBlock::ToolResult { start_time, .. } => *start_time = Some(time), + ContentBlock::CompactionSummary { start_time, .. } => *start_time = Some(time), } self } @@ -411,6 +431,7 @@ impl ContentBlock { ContentBlock::Image { end_time, .. } => *end_time = Some(time), ContentBlock::ToolUse { end_time, .. } => *end_time = Some(time), ContentBlock::ToolResult { end_time, .. } => *end_time = Some(time), + ContentBlock::CompactionSummary { end_time, .. } => *end_time = Some(time), } self } @@ -519,6 +540,10 @@ impl ContentBlock { .. }, ) => a_id == b_id && a_content == b_content && a_error == b_error, + ( + ContentBlock::CompactionSummary { text: a_text, .. }, + ContentBlock::CompactionSummary { text: b_text, .. }, + ) => a_text == b_text, _ => false, // Different variants } } diff --git a/docs/context-compaction.md b/docs/context-compaction.md new file mode 100644 index 00000000..44e24eec --- /dev/null +++ b/docs/context-compaction.md @@ -0,0 +1,36 @@ +# Context Compaction Implementation Plan + +This document outlines the phased approach for adding automatic context compaction to the agent loop. The goal is to proactively summarize long conversations when the active model’s context window nears capacity, keep the UI history intact, and continue prompting the LLM with only the most recent summarized state. + +## Phase 1 – Configuration & Data Model +- Require a `context_token_limit` field in every model entry in `models.json`. +- Update the configuration loader (`crates/llm/src/provider_config.rs`) and validation logic to deserialize, store, and surface this limit. +- Propagate the limit into `SessionModelConfig` (`crates/code_assistant/src/persistence.rs`) and ensure session creation (`crates/code_assistant/src/session/manager.rs`) records it. +- Introduce `ContentBlock::CompactionSummary { text: String }` in `crates/llm/src/types.rs` and adjust serialization, deserialization, and any exhaustive `match` arms that enumerate block variants. +- **Tests:** extend existing configuration loading tests (or add new ones) to assert `context_token_limit` is required and correctly parsed; add coverage verifying the new content block round-trips through serialization. + +## Phase 2 – Agent Compaction Logic +- Add helpers in `crates/code_assistant/src/agent/runner.rs` to read the context limit, calculate the percent of the window consumed based on the latest assistant `Usage`, and define a compaction threshold (e.g., 80%). +- Before building an `LLMRequest` in `run_single_iteration`, detect when the threshold is exceeded. +- When triggered, inject a system-authored prompt requesting a detailed summary, send it to the LLM, and store the response as an assistant message containing `ContentBlock::CompactionSummary`. +- Adjust the message-preparation path (`render_tool_results_in_messages` and any related helpers) so the next LLM request only includes messages from the last compaction summary onward, while keeping the full `message_history` for persistence and UI. +- **Tests:** add unit coverage to assert the compaction branch fires when expected, the summary block is stored correctly, and filtering logic feeds only post-summary messages to the provider. + +## Phase 3 – Persistence & Reload +- Ensure `ChatSession` serialization (`crates/code_assistant/src/persistence.rs`) handles the new summary block without data loss. +- Verify session loading (`Agent::load_from_session_state`) and `SessionInstance::convert_messages_to_ui_data` (`crates/code_assistant/src/session/instance.rs`) keep summaries visible while still allowing the agent to trim the prompt correctly. +- **Tests:** add persistence round-trip tests (if absent) that include a compaction summary and confirm reload semantics remain consistent. + +## Phase 4 – UI Presentation +- Extend `DisplayFragment` with `CompactionDivider` in `crates/code_assistant/src/ui/streaming/mod.rs`. +- Update stream processors (`json_processor.rs`, `xml_processor.rs`, `caret_processor.rs`) to emit the divider fragment for `ContentBlock::CompactionSummary`. +- Enhance GPUI components: + - Add a collapsible divider block in `crates/code_assistant/src/ui/gpui/elements.rs` showing the “conversation compacted” banner and the summary text. + - Ensure `MessagesView` (`crates/code_assistant/src/ui/gpui/messages.rs`) handles the fragment, including expand/collapse state management. +- **Tests:** add GPUI/component tests (or logic tests where available) validating the divider renders, defaults to collapsed, and expands to reveal the summary. + +## Phase 5 – Validation & Follow-Up +- Run formatting (`cargo fmt`), linting (`cargo clippy` once re-enabled), and targeted test suites (`cargo test` with focus on updated modules). +- Add or update documentation references pointing to this file if needed. +- **Tests:** confirm the new automated tests pass and consider adding integration coverage that simulates a full compaction cycle end-to-end. + diff --git a/models.example.json b/models.example.json index 00c805bf..f4b5004a 100644 --- a/models.example.json +++ b/models.example.json @@ -2,6 +2,7 @@ "Claude Sonnet 4.5": { "provider": "anthropic-main", "id": "claude-sonnet-4-5", + "context_token_limit": 200000, "config": { "max_tokens": 32768, "thinking": { @@ -13,6 +14,7 @@ "GPT-5-Codex (low)": { "provider": "openai-responses", "id": "gpt-5-codex", + "context_token_limit": 128000, "config": { "temperature": 0.7, "reasoning": { @@ -24,6 +26,7 @@ "GPT-5-Codex (medium)": { "provider": "openai-responses", "id": "gpt-5-codex", + "context_token_limit": 128000, "config": { "temperature": 0.7, "reasoning": { @@ -35,6 +38,7 @@ "GPT-4.1": { "provider": "openai-main", "id": "gpt-4.1", + "context_token_limit": 128000, "config": { "temperature": 0.8, "max_tokens": 4096 @@ -43,6 +47,7 @@ "Llama 3.2 8B": { "provider": "ollama-local", "id": "llama3.3:8b", + "context_token_limit": 16384, "config": { "options": { "num_ctx": 16384, @@ -53,6 +58,7 @@ "Qwen 3 Coder 480B": { "provider": "groq-main", "id": "qwen-3-coder-480b", + "context_token_limit": 32768, "config": { "temperature": 0.7, "top_p": 0.8 @@ -61,6 +67,7 @@ "Moonshot Kimi K2": { "provider": "groq-main", "id": "moonshotai/kimi-k2-instruct", + "context_token_limit": 32768, "config": { "temperature": 0.6 } @@ -68,6 +75,7 @@ "Cerebras GPT OSS 120B": { "provider": "cerebras-main", "id": "gpt-oss-120b", + "context_token_limit": 16384, "config": { "temperature": 0.7 } @@ -75,6 +83,7 @@ "Mistral Devstral Medium": { "provider": "mistral-main", "id": "devstral-medium-2507", + "context_token_limit": 200000, "config": { "temperature": 0.7 } @@ -82,6 +91,7 @@ "Gemini 2.5 Pro": { "provider": "vertex-main", "id": "gemini-2.5-pro-preview-06-05", + "context_token_limit": 200000, "config": { "temperature": 0.8 } @@ -89,6 +99,7 @@ "AI Core Claude Sonnet 4": { "provider": "ai-core-dev", "id": "claude-sonnet-4", + "context_token_limit": 200000, "config": {} } } From 09f727d5a1ff3321422f99f2c78ae082ae0819c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20A=C3=9Fmus?= Date: Sat, 25 Oct 2025 01:33:16 +0200 Subject: [PATCH 02/14] Fix clippy warnings --- crates/code_assistant/src/agent/runner.rs | 2 +- crates/code_assistant/src/persistence.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/code_assistant/src/agent/runner.rs b/crates/code_assistant/src/agent/runner.rs index 8aeeab8e..7334c166 100644 --- a/crates/code_assistant/src/agent/runner.rs +++ b/crates/code_assistant/src/agent/runner.rs @@ -1333,7 +1333,7 @@ impl Agent { let start_index = self .message_history .iter() - .rposition(|msg| Self::message_contains_compaction_summary(msg)) + .rposition(Self::message_contains_compaction_summary) .unwrap_or(0); // Now rebuild the message history, replacing tool outputs with our dynamically rendered versions diff --git a/crates/code_assistant/src/persistence.rs b/crates/code_assistant/src/persistence.rs index dd05101f..13fd4e68 100644 --- a/crates/code_assistant/src/persistence.rs +++ b/crates/code_assistant/src/persistence.rs @@ -134,7 +134,7 @@ impl SessionModelConfig { let limit = config_system .get_model(&model_name) .map(|model| model.context_token_limit) - .ok_or_else(|| anyhow!("Model not found in models.json: {}", model_name))?; + .ok_or_else(|| anyhow!("Model not found in models.json: {model_name}"))?; Ok(Self { model_name, From ba9179dac4564ad5f94ae3aa337708904c76a730 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20A=C3=9Fmus?= Date: Sat, 25 Oct 2025 01:39:03 +0200 Subject: [PATCH 03/14] Persist final codex message --- docs/context-compaction.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/context-compaction.md b/docs/context-compaction.md index 44e24eec..b1da6838 100644 --- a/docs/context-compaction.md +++ b/docs/context-compaction.md @@ -34,3 +34,13 @@ This document outlines the phased approach for adding automatic context compacti - Add or update documentation references pointing to this file if needed. - **Tests:** confirm the new automated tests pass and consider adding integration coverage that simulates a full compaction cycle end-to-end. +## Outcome + +─ Worked for 1h 18m 34s ────────────────────────────────────────────────────────────────────────────────────────────────── + +• - Added automatic compaction support: when prior assistant usage crosses the configured context window threshold, we now inject a summary request, persist the result as ContentBlock::CompactionSummary, and refresh the UI so a collapsible “conversation compacted” banner appears (crates/code_assistant/src/agent/runner.rs#L1239, crates/code_assistant/src/ui/gpui/elements.rs#L1488, crates/code_assistant/src/ui/gpui/mod.rs#L1016). + - Introduced context_token_limit to the model pipeline, wiring it through config loading (crates/llm/src/provider_config.rs#L26), session persistence (crates/code_assistant/src/persistence.rs#L16), manager creation (crates/code_assistant/src/session/manager.rs#L60), and ensuring legacy sessions hydrate the limit via SessionModelConfig::ensure_context_limit. + - Updated streaming/terminal/ACP surfaces to understand the new compaction summary block so replay, terminal, and ACP conversions all remain consistent (see crates/llm/src/anthropic.rs#L170, crates/code_assistant/src/ui/streaming/json_processor.rs#L311, crates/code_assistant/src/ui/terminal/ui.rs#L420, crates/code_assistant/src/acp/ui.rs#L130). + - Documented the rollout as a phased plan in docs/context-compaction.md and added focused unit coverage (crates/code_assistant/src/agent/tests.rs#L780) validating summary insertion and UI refresh. + + Tests: cargo test test_context_compaction_inserts_summary --tests (passes). Full cargo test is blocked in this sandbox by OS permission checks (integration test test_tool_limit_with_realistic_anthropic_chunks). From 1b1c448bcbf107f063e760abf3cee4af58179680 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20A=C3=9Fmus?= Date: Sun, 26 Oct 2025 17:50:36 +0100 Subject: [PATCH 04/14] Remove per-session context_token_limit and record_path fields SessionModelConfig no longer persists context_token_limit or record_path. Context window size is now always loaded from the provider config at runtime. Recording/playback paths are passed as runtime options instead of being stored in session files. Updates all affected code paths and tests. --- crates/code_assistant/src/acp/agent.rs | 34 ++++++--------- crates/code_assistant/src/agent/runner.rs | 28 +++++++++---- crates/code_assistant/src/agent/tests.rs | 7 +--- crates/code_assistant/src/app/gpui.rs | 29 +++++++------ crates/code_assistant/src/persistence.rs | 44 ++++++++------------ crates/code_assistant/src/session/manager.rs | 23 +++------- crates/code_assistant/src/ui/backend.rs | 18 ++++++-- crates/code_assistant/src/ui/terminal/app.rs | 10 ++++- crates/llm/src/factory.rs | 28 +++++++++---- docs/context-compaction.md | 2 +- docs/model-selection.md | 2 +- 11 files changed, 119 insertions(+), 106 deletions(-) diff --git a/crates/code_assistant/src/acp/agent.rs b/crates/code_assistant/src/acp/agent.rs index feba37ec..94c58f7a 100644 --- a/crates/code_assistant/src/acp/agent.rs +++ b/crates/code_assistant/src/acp/agent.rs @@ -323,7 +323,7 @@ impl acp::Agent for ACPAgentImpl { let session_id = { let mut manager = session_manager.lock().await; - let session_model_config = SessionModelConfig::for_model(model_name.clone(), None) + let session_model_config = SessionModelConfig::for_model(model_name.clone()) .map_err(|e| { tracing::error!( error = ?e, @@ -353,7 +353,7 @@ impl acp::Agent for ACPAgentImpl { if model_info.selection_changed { let mut manager = session_manager.lock().await; let fallback_model_config = - SessionModelConfig::for_model(model_info.selected_model_name.clone(), None) + SessionModelConfig::for_model(model_info.selected_model_name.clone()) .map_err(|e| { tracing::error!( error = ?e, @@ -459,13 +459,9 @@ impl acp::Agent for ACPAgentImpl { .map(|config| config.model_name.as_str()), ) { if model_info.selection_changed { - let record_path = stored_model_config - .as_ref() - .and_then(|config| config.record_path.clone()); let mut manager = session_manager.lock().await; let fallback_model_config = SessionModelConfig::for_model( model_info.selected_model_name.clone(), - record_path, ) .map_err(|e| { tracing::error!( @@ -563,7 +559,7 @@ impl acp::Agent for ACPAgentImpl { let session_model_config = match config_result { Ok(Some(config)) => config, - Ok(None) => match SessionModelConfig::for_model(model_name.clone(), None) { + Ok(None) => match SessionModelConfig::for_model(model_name.clone()) { Ok(config) => config, Err(e) => { let error_msg = format!( @@ -597,6 +593,7 @@ impl acp::Agent for ACPAgentImpl { &model_name_for_prompt, playback_path, fast_playback, + None, ) .await { @@ -845,24 +842,17 @@ impl acp::Agent for ACPAgentImpl { manager.get_session_model_config(&session_id.0) }; - let record_path = match existing_config { - Ok(Some(config)) => config.record_path, - Ok(None) => None, - Err(err) => { - tracing::error!( - error = ?err, - "ACP: Failed to read existing session model configuration" - ); - return Err(acp::Error::internal_error()); - } - }; + if let Err(err) = existing_config { + tracing::error!( + error = ?err, + "ACP: Failed to read existing session model configuration" + ); + return Err(acp::Error::internal_error()); + } { let mut manager = session_manager.lock().await; - let new_model_config = match SessionModelConfig::for_model( - display_name.clone(), - record_path.clone(), - ) { + let new_model_config = match SessionModelConfig::for_model(display_name.clone()) { Ok(config) => config, Err(err) => { tracing::error!( diff --git a/crates/code_assistant/src/agent/runner.rs b/crates/code_assistant/src/agent/runner.rs index 7334c166..a425c1c6 100644 --- a/crates/code_assistant/src/agent/runner.rs +++ b/crates/code_assistant/src/agent/runner.rs @@ -64,6 +64,8 @@ pub struct Agent { model_hint: Option, // Model configuration associated with this session session_model_config: Option, + // Optional override for the model's context window (primarily used in tests) + context_limit_override: Option, // Counter for generating unique request IDs next_request_id: u64, // Session ID for this agent instance @@ -127,6 +129,7 @@ impl Agent { tool_executions: Vec::new(), cached_system_prompts: HashMap::new(), session_model_config: None, + context_limit_override: None, next_request_id: 1, // Start from 1 session_id: None, session_name: String::new(), @@ -433,8 +436,8 @@ impl Agent { } self.session_name = session_state.name; self.session_model_config = session_state.model_config; + self.context_limit_override = None; if let Some(model_config) = self.session_model_config.as_mut() { - model_config.ensure_context_limit()?; let model_name = model_config.model_name.clone(); self.set_model_hint(Some(model_name)); } @@ -1226,21 +1229,32 @@ impl Agent { self.session_model_config = Some(model_config); } + #[cfg(test)] + pub fn set_test_context_limit(&mut self, limit: u32) { + self.context_limit_override = Some(limit); + } + #[cfg(test)] pub fn message_history_for_tests(&self) -> &Vec { &self.message_history } fn context_usage_ratio(&mut self) -> Result> { - let config = match self.session_model_config.as_mut() { - Some(config) => { - config.ensure_context_limit()?; - config - } + let model_name = match self.session_model_config.as_ref() { + Some(config) => config.model_name.clone(), None => return Ok(None), }; - let limit = config.context_token_limit; + let limit = if let Some(limit) = self.context_limit_override { + limit + } else { + let config_system = llm::provider_config::ConfigurationSystem::load()?; + config_system + .get_model(&model_name) + .map(|model| model.context_token_limit) + .ok_or_else(|| anyhow::anyhow!("Model not found in models.json: {model_name}"))? + }; + if limit == 0 { return Ok(None); } diff --git a/crates/code_assistant/src/agent/tests.rs b/crates/code_assistant/src/agent/tests.rs index c2824dda..29b3bdca 100644 --- a/crates/code_assistant/src/agent/tests.rs +++ b/crates/code_assistant/src/agent/tests.rs @@ -782,12 +782,9 @@ async fn test_context_compaction_inserts_summary() -> Result<()> { agent.disable_naming_reminders(); agent.set_test_session_metadata( "session-1".to_string(), - SessionModelConfig { - model_name: "test-model".to_string(), - record_path: None, - context_token_limit: 100, - }, + SessionModelConfig::new_for_tests("test-model".to_string()), ); + agent.set_test_context_limit(100); agent.append_message(Message { role: MessageRole::User, diff --git a/crates/code_assistant/src/app/gpui.rs b/crates/code_assistant/src/app/gpui.rs index 58ad4312..f54a9d1c 100644 --- a/crates/code_assistant/src/app/gpui.rs +++ b/crates/code_assistant/src/app/gpui.rs @@ -29,7 +29,7 @@ pub fn run(config: AgentRunConfig) -> Result<()> { let default_model = config.model.clone(); let base_session_model_config = - crate::persistence::SessionModelConfig::for_model(default_model.clone(), None)?; + crate::persistence::SessionModelConfig::for_model(default_model.clone())?; // Create the new SessionManager let multi_session_manager = Arc::new(Mutex::new(SessionManager::new( @@ -56,13 +56,10 @@ pub fn run(config: AgentRunConfig) -> Result<()> { // Task provided - create new session and start agent debug!("Creating initial session with task: {}", initial_task); - let mut session_model_config = base_model_config.clone(); - session_model_config.record_path = record.clone(); - let session_id = { let mut manager = multi_session_manager.lock().await; manager - .create_session_with_config(None, None, Some(session_model_config.clone())) + .create_session_with_config(None, None, Some(base_model_config.clone())) .unwrap() }; @@ -91,10 +88,14 @@ pub fn run(config: AgentRunConfig) -> Result<()> { let user_interface: Arc = Arc::new(gui_for_thread.clone()); - let llm_client = - create_llm_client_from_model(&model, playback.clone(), fast_playback) - .await - .expect("Failed to create LLM client"); + let llm_client = create_llm_client_from_model( + &model, + playback.clone(), + fast_playback, + record.clone(), + ) + .await + .expect("Failed to create LLM client"); { let mut manager = multi_session_manager.lock().await; @@ -145,12 +146,9 @@ pub fn run(config: AgentRunConfig) -> Result<()> { // Create a new session automatically let new_session_id = { - let mut model_config = base_model_config.clone(); - model_config.record_path = record.clone(); - let mut manager = multi_session_manager.lock().await; manager - .create_session_with_config(None, None, Some(model_config)) + .create_session_with_config(None, None, Some(base_model_config.clone())) .unwrap_or_else(|e| { error!("Failed to create new session: {}", e); // Return a fallback session ID if creation fails @@ -186,6 +184,11 @@ pub fn run(config: AgentRunConfig) -> Result<()> { backend_event_rx, backend_response_tx, multi_session_manager, + Arc::new(crate::ui::backend::BackendRuntimeOptions { + record_path: record.clone(), + playback_path: playback.clone(), + fast_playback, + }), Arc::new(gui_for_thread) as Arc, ) .await; diff --git a/crates/code_assistant/src/persistence.rs b/crates/code_assistant/src/persistence.rs index 13fd4e68..29e5af6b 100644 --- a/crates/code_assistant/src/persistence.rs +++ b/crates/code_assistant/src/persistence.rs @@ -16,12 +16,12 @@ use crate::types::{PlanState, ToolSyntax, WorkingMemory}; pub struct SessionModelConfig { /// Display name of the model from models.json pub model_name: String, - /// Optional recording path for this session - pub record_path: Option, - /// Maximum context window supported by the model (token count) - #[serde(default)] - pub context_token_limit: u32, - // Note: playback and fast_playback are runtime toggles, not persisted + /// Legacy recording path persisted in older session files (ignored at runtime) + #[serde(default, rename = "record_path", skip_serializing)] + _legacy_record_path: Option, + /// Legacy context token limit persisted in older session files (ignored at runtime) + #[serde(default, rename = "context_token_limit", skip_serializing)] + _legacy_context_token_limit: Option, } /// A complete chat session with all its data @@ -94,9 +94,6 @@ impl ChatSession { if let Some(use_diff_blocks) = self.legacy_use_diff_blocks.take() { self.config.use_diff_blocks = use_diff_blocks; } - if let Some(model_config) = self.model_config.as_mut() { - model_config.ensure_context_limit()?; - } Ok(()) } @@ -129,31 +126,26 @@ impl ChatSession { impl SessionModelConfig { /// Construct a session model configuration by looking up the model metadata. - pub fn for_model(model_name: String, record_path: Option) -> Result { + pub fn for_model(model_name: String) -> Result { let config_system = llm::provider_config::ConfigurationSystem::load()?; - let limit = config_system - .get_model(&model_name) - .map(|model| model.context_token_limit) - .ok_or_else(|| anyhow!("Model not found in models.json: {model_name}"))?; + if config_system.get_model(&model_name).is_none() { + return Err(anyhow!("Model not found in models.json: {model_name}")); + } Ok(Self { model_name, - record_path, - context_token_limit: limit, + _legacy_record_path: None, + _legacy_context_token_limit: None, }) } - /// Ensure the context token limit is populated (for legacy session files). - pub fn ensure_context_limit(&mut self) -> Result<()> { - if self.context_token_limit == 0 { - let config_system = llm::provider_config::ConfigurationSystem::load()?; - let limit = config_system - .get_model(&self.model_name) - .map(|model| model.context_token_limit) - .ok_or_else(|| anyhow!("Model not found in models.json: {}", self.model_name))?; - self.context_token_limit = limit; + #[cfg(test)] + pub fn new_for_tests(model_name: String) -> Self { + Self { + model_name, + _legacy_record_path: None, + _legacy_context_token_limit: None, } - Ok(()) } } diff --git a/crates/code_assistant/src/session/manager.rs b/crates/code_assistant/src/session/manager.rs index 0d7af47b..7f4adfea 100644 --- a/crates/code_assistant/src/session/manager.rs +++ b/crates/code_assistant/src/session/manager.rs @@ -62,7 +62,7 @@ impl SessionManager { /// Get the default model configuration fn default_model_config(&self) -> Result { - SessionModelConfig::for_model(self.default_model_name.clone(), None) + SessionModelConfig::for_model(self.default_model_name.clone()) } /// Create a new session with optional model config and return its ID @@ -416,18 +416,12 @@ impl SessionManager { /// Get the model config for a session, if any pub fn get_session_model_config(&self, session_id: &str) -> Result> { if let Some(instance) = self.active_sessions.get(session_id) { - if let Some(mut config) = instance.session.model_config.clone() { - config.ensure_context_limit()?; - Ok(Some(config)) - } else { - Ok(None) - } + Ok(instance.session.model_config.clone()) } else { // Load from persistence match self.persistence.load_chat_session(session_id)? { Some(mut session) => { - if let Some(mut config) = session.model_config.take() { - config.ensure_context_limit()?; + if let Some(config) = session.model_config.take() { Ok(Some(config)) } else { Ok(None) @@ -449,18 +443,11 @@ impl SessionManager { .load_chat_session(session_id)? .ok_or_else(|| anyhow::anyhow!("Session not found: {session_id}"))?; - let sanitized_model_config = if let Some(mut config) = model_config.clone() { - config.ensure_context_limit()?; - Some(config) - } else { - None - }; - - session.model_config = sanitized_model_config.clone(); + session.model_config = model_config.clone(); self.persistence.save_chat_session(&session)?; if let Some(instance) = self.active_sessions.get_mut(session_id) { - instance.session.model_config = sanitized_model_config; + instance.session.model_config = model_config; } Ok(()) diff --git a/crates/code_assistant/src/ui/backend.rs b/crates/code_assistant/src/ui/backend.rs index 2786df60..23504d5b 100644 --- a/crates/code_assistant/src/ui/backend.rs +++ b/crates/code_assistant/src/ui/backend.rs @@ -5,6 +5,7 @@ use crate::ui::UserInterface; use crate::utils::{content::content_blocks_from, DefaultCommandExecutor}; use llm::factory::create_llm_client_from_model; +use std::path::PathBuf; use std::sync::Arc; use tokio::sync::Mutex; use tracing::{debug, error, info, trace}; @@ -77,10 +78,18 @@ pub enum BackendResponse { }, } +#[derive(Debug, Clone)] +pub struct BackendRuntimeOptions { + pub record_path: Option, + pub playback_path: Option, + pub fast_playback: bool, +} + pub async fn handle_backend_events( backend_event_rx: async_channel::Receiver, backend_response_tx: async_channel::Sender, multi_session_manager: Arc>, + runtime_options: Arc, ui: Arc, ) { debug!("Backend event handler started"); @@ -113,6 +122,7 @@ pub async fn handle_backend_events( &session_id, &message, &attachments, + runtime_options.as_ref(), &ui, ) .await @@ -264,6 +274,7 @@ async fn handle_send_user_message( session_id: &str, message: &str, attachments: &[DraftAttachment], + runtime_options: &BackendRuntimeOptions, ui: &Arc, ) -> Option { debug!( @@ -304,8 +315,9 @@ async fn handle_send_user_message( // Use session's stored model create_llm_client_from_model( &session_config.model_name, - session_config.record_path.clone(), - false, + runtime_options.playback_path.clone(), + runtime_options.fast_playback, + runtime_options.record_path.clone(), ) .await } else { @@ -454,7 +466,7 @@ async fn handle_switch_model( ); // Create new session model config - let new_model_config = match SessionModelConfig::for_model(model_name.to_string(), None) { + let new_model_config = match SessionModelConfig::for_model(model_name.to_string()) { Ok(config) => config, Err(e) => { error!( diff --git a/crates/code_assistant/src/ui/terminal/app.rs b/crates/code_assistant/src/ui/terminal/app.rs index 2561e782..3d0699c0 100644 --- a/crates/code_assistant/src/ui/terminal/app.rs +++ b/crates/code_assistant/src/ui/terminal/app.rs @@ -2,7 +2,9 @@ use crate::app::AgentRunConfig; use crate::persistence::FileSessionPersistence; use crate::session::manager::SessionManager; use crate::session::SessionConfig; -use crate::ui::backend::{handle_backend_events, BackendEvent, BackendResponse}; +use crate::ui::backend::{ + handle_backend_events, BackendEvent, BackendResponse, BackendRuntimeOptions, +}; use crate::ui::terminal::{ input::{InputManager, KeyEventResult}, renderer::ProductionTerminalRenderer, @@ -284,6 +286,11 @@ impl TerminalTuiApp { // Spawn backend handler let backend_task = { let multi_session_manager = multi_session_manager.clone(); + let runtime_options = Arc::new(BackendRuntimeOptions { + record_path: config.record.clone(), + playback_path: config.playback.clone(), + fast_playback: config.fast_playback, + }); let ui = ui.clone(); tokio::spawn(async move { @@ -291,6 +298,7 @@ impl TerminalTuiApp { backend_event_rx, backend_response_tx, multi_session_manager, + runtime_options, ui, ) .await; diff --git a/crates/llm/src/factory.rs b/crates/llm/src/factory.rs index 0023c63a..8d379371 100644 --- a/crates/llm/src/factory.rs +++ b/crates/llm/src/factory.rs @@ -177,12 +177,19 @@ pub async fn create_llm_client_from_model( model_name: &str, playback_path: Option, fast_playback: bool, + record_path: Option, ) -> Result> { let config_system = ConfigurationSystem::load()?; let (model_config, provider_config) = config_system.get_model_with_provider(model_name)?; - create_llm_client_from_configs(model_config, provider_config, playback_path, fast_playback) - .await + create_llm_client_from_configs( + model_config, + provider_config, + playback_path, + fast_playback, + record_path, + ) + .await } /// Create an LLM client from model and provider configurations @@ -191,6 +198,7 @@ pub async fn create_llm_client_from_configs( provider_config: &ProviderConfig, playback_path: Option, fast_playback: bool, + record_path_override: Option, ) -> Result> { // Build optional playback state once let playback_state = if let Some(path) = &playback_path { @@ -223,16 +231,18 @@ pub async fn create_llm_client_from_configs( } }; - // Extract recording path from model config if present - let record_path = model_config - .config - .get("record_path") - .and_then(|v| v.as_str()) - .map(PathBuf::from); + // Extract recording path from model config (allowing runtime override) + let record_path = record_path_override.or_else(|| { + model_config + .config + .get("record_path") + .and_then(|v| v.as_str()) + .map(PathBuf::from) + }); match provider_type { LLMProviderType::AiCore => { - create_ai_core_client(model_config, provider_config, record_path).await + create_ai_core_client(model_config, provider_config, record_path.clone()).await } LLMProviderType::Anthropic => { create_anthropic_client(model_config, provider_config, record_path, playback_state) diff --git a/docs/context-compaction.md b/docs/context-compaction.md index b1da6838..48e47460 100644 --- a/docs/context-compaction.md +++ b/docs/context-compaction.md @@ -39,7 +39,7 @@ This document outlines the phased approach for adding automatic context compacti ─ Worked for 1h 18m 34s ────────────────────────────────────────────────────────────────────────────────────────────────── • - Added automatic compaction support: when prior assistant usage crosses the configured context window threshold, we now inject a summary request, persist the result as ContentBlock::CompactionSummary, and refresh the UI so a collapsible “conversation compacted” banner appears (crates/code_assistant/src/agent/runner.rs#L1239, crates/code_assistant/src/ui/gpui/elements.rs#L1488, crates/code_assistant/src/ui/gpui/mod.rs#L1016). - - Introduced context_token_limit to the model pipeline, wiring it through config loading (crates/llm/src/provider_config.rs#L26), session persistence (crates/code_assistant/src/persistence.rs#L16), manager creation (crates/code_assistant/src/session/manager.rs#L60), and ensuring legacy sessions hydrate the limit via SessionModelConfig::ensure_context_limit. + - Introduced context_token_limit to the model pipeline, loading it from the shared provider configuration whenever the agent needs it (crates/llm/src/provider_config.rs#L26, crates/code_assistant/src/agent/runner.rs#L1239) instead of persisting it per session. - Updated streaming/terminal/ACP surfaces to understand the new compaction summary block so replay, terminal, and ACP conversions all remain consistent (see crates/llm/src/anthropic.rs#L170, crates/code_assistant/src/ui/streaming/json_processor.rs#L311, crates/code_assistant/src/ui/terminal/ui.rs#L420, crates/code_assistant/src/acp/ui.rs#L130). - Documented the rollout as a phased plan in docs/context-compaction.md and added focused unit coverage (crates/code_assistant/src/agent/tests.rs#L780) validating summary insertion and UI refresh. diff --git a/docs/model-selection.md b/docs/model-selection.md index eee558a0..66fcc1f8 100644 --- a/docs/model-selection.md +++ b/docs/model-selection.md @@ -178,7 +178,7 @@ The system currently supports these providers (from `LLMProviderType` enum): **✅ 2.1 Update Session Persistence** - ✅ Modified `crates/code_assistant/src/persistence.rs`: - - ✅ Replaced `LlmSessionConfig` with new `SessionModelConfig` containing only `model_name` and `record_path` + - ✅ Replaced `LlmSessionConfig` with new `SessionModelConfig` that tracks the selected `model_name` - ✅ Removed all old provider-specific fields (provider, base_url, aicore_config, num_ctx) - ✅ Updated session creation/loading to use model-based config - ✅ Maintained backward compatibility for existing session files From 3c09cad708246b43ae482f9459b0ff66646aefa5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20A=C3=9Fmus?= Date: Sun, 26 Oct 2025 17:59:17 +0100 Subject: [PATCH 05/14] Remove model config validation from SessionModelConfig Model existence checks are now performed at the call site, specifically in the backend handler. SessionModelConfig::for_model is replaced with SessionModelConfig::new, which no longer returns a Result. Callers are updated accordingly. --- crates/code_assistant/src/acp/agent.rs | 58 ++------------------ crates/code_assistant/src/app/gpui.rs | 2 +- crates/code_assistant/src/persistence.rs | 15 ++--- crates/code_assistant/src/session/manager.rs | 8 +-- crates/code_assistant/src/ui/backend.rs | 21 ++++--- 5 files changed, 30 insertions(+), 74 deletions(-) diff --git a/crates/code_assistant/src/acp/agent.rs b/crates/code_assistant/src/acp/agent.rs index 94c58f7a..72daa242 100644 --- a/crates/code_assistant/src/acp/agent.rs +++ b/crates/code_assistant/src/acp/agent.rs @@ -323,15 +323,7 @@ impl acp::Agent for ACPAgentImpl { let session_id = { let mut manager = session_manager.lock().await; - let session_model_config = SessionModelConfig::for_model(model_name.clone()) - .map_err(|e| { - tracing::error!( - error = ?e, - "ACP: Failed to load model configuration for {}", - model_name - ); - to_acp_error(&e) - })?; + let session_model_config = SessionModelConfig::new(model_name.clone()); manager .create_session_with_config( None, @@ -353,14 +345,7 @@ impl acp::Agent for ACPAgentImpl { if model_info.selection_changed { let mut manager = session_manager.lock().await; let fallback_model_config = - SessionModelConfig::for_model(model_info.selected_model_name.clone()) - .map_err(|e| { - tracing::error!( - error = ?e, - "ACP: Failed to build fallback model config" - ); - to_acp_error(&e) - })?; + SessionModelConfig::new(model_info.selected_model_name.clone()); if let Err(err) = manager.set_session_model_config(&session_id, Some(fallback_model_config)) { @@ -460,17 +445,8 @@ impl acp::Agent for ACPAgentImpl { ) { if model_info.selection_changed { let mut manager = session_manager.lock().await; - let fallback_model_config = SessionModelConfig::for_model( - model_info.selected_model_name.clone(), - ) - .map_err(|e| { - tracing::error!( - error = ?e, - "ACP: Failed to build fallback model config when loading session {}", - arguments.session_id.0 - ); - to_acp_error(&e) - })?; + let fallback_model_config = + SessionModelConfig::new(model_info.selected_model_name.clone()); if let Err(err) = manager.set_session_model_config( &arguments.session_id.0, Some(fallback_model_config), @@ -559,20 +535,7 @@ impl acp::Agent for ACPAgentImpl { let session_model_config = match config_result { Ok(Some(config)) => config, - Ok(None) => match SessionModelConfig::for_model(model_name.clone()) { - Ok(config) => config, - Err(e) => { - let error_msg = format!( - "Failed to load default model configuration for session {}: {e}", - arguments.session_id.0 - ); - tracing::error!("{}", error_msg); - let mut uis = active_uis.lock().await; - uis.remove(arguments.session_id.0.as_ref()); - let err = e.context(error_msg); - return Err(to_acp_error(&err)); - } - }, + Ok(None) => SessionModelConfig::new(model_name.clone()), Err(e) => { let error_msg = format!( "Failed to load session model configuration for session {}: {e}", @@ -852,16 +815,7 @@ impl acp::Agent for ACPAgentImpl { { let mut manager = session_manager.lock().await; - let new_model_config = match SessionModelConfig::for_model(display_name.clone()) { - Ok(config) => config, - Err(err) => { - tracing::error!( - error = ?err, - "ACP: Failed to build session model configuration" - ); - return Err(to_acp_error(&err)); - } - }; + let new_model_config = SessionModelConfig::new(display_name.clone()); if let Err(err) = manager.set_session_model_config(&session_id.0, Some(new_model_config)) { diff --git a/crates/code_assistant/src/app/gpui.rs b/crates/code_assistant/src/app/gpui.rs index f54a9d1c..91e8b619 100644 --- a/crates/code_assistant/src/app/gpui.rs +++ b/crates/code_assistant/src/app/gpui.rs @@ -29,7 +29,7 @@ pub fn run(config: AgentRunConfig) -> Result<()> { let default_model = config.model.clone(); let base_session_model_config = - crate::persistence::SessionModelConfig::for_model(default_model.clone())?; + crate::persistence::SessionModelConfig::new(default_model.clone()); // Create the new SessionManager let multi_session_manager = Arc::new(Mutex::new(SessionManager::new( diff --git a/crates/code_assistant/src/persistence.rs b/crates/code_assistant/src/persistence.rs index 29e5af6b..838f2961 100644 --- a/crates/code_assistant/src/persistence.rs +++ b/crates/code_assistant/src/persistence.rs @@ -1,4 +1,4 @@ -use anyhow::{anyhow, Result}; +use anyhow::Result; use llm::Message; use serde::{Deserialize, Serialize}; use std::collections::HashMap; @@ -125,18 +125,13 @@ impl ChatSession { } impl SessionModelConfig { - /// Construct a session model configuration by looking up the model metadata. - pub fn for_model(model_name: String) -> Result { - let config_system = llm::provider_config::ConfigurationSystem::load()?; - if config_system.get_model(&model_name).is_none() { - return Err(anyhow!("Model not found in models.json: {model_name}")); - } - - Ok(Self { + /// Construct a session model configuration for the given display name. + pub fn new(model_name: String) -> Self { + Self { model_name, _legacy_record_path: None, _legacy_context_token_limit: None, - }) + } } #[cfg(test)] diff --git a/crates/code_assistant/src/session/manager.rs b/crates/code_assistant/src/session/manager.rs index 7f4adfea..9c91d870 100644 --- a/crates/code_assistant/src/session/manager.rs +++ b/crates/code_assistant/src/session/manager.rs @@ -56,13 +56,13 @@ impl SessionManager { /// Create a new session and return its ID pub fn create_session(&mut self, name: Option) -> Result { // Always create sessions with a default model config - let default_model_config = self.default_model_config()?; + let default_model_config = self.default_model_config(); self.create_session_with_config(name, None, Some(default_model_config)) } /// Get the default model configuration - fn default_model_config(&self) -> Result { - SessionModelConfig::for_model(self.default_model_name.clone()) + fn default_model_config(&self) -> SessionModelConfig { + SessionModelConfig::new(self.default_model_name.clone()) } /// Create a new session with optional model config and return its ID @@ -160,7 +160,7 @@ impl SessionManager { if let Some(model_name) = existing_model_name { model_name } else { - let default_model_config = self.default_model_config()?; + let default_model_config = self.default_model_config(); let model_name = default_model_config.model_name.clone(); { let session_instance = self.active_sessions.get_mut(&session_id).unwrap(); diff --git a/crates/code_assistant/src/ui/backend.rs b/crates/code_assistant/src/ui/backend.rs index 23504d5b..25626c40 100644 --- a/crates/code_assistant/src/ui/backend.rs +++ b/crates/code_assistant/src/ui/backend.rs @@ -4,6 +4,7 @@ use crate::session::SessionManager; use crate::ui::UserInterface; use crate::utils::{content::content_blocks_from, DefaultCommandExecutor}; use llm::factory::create_llm_client_from_model; +use llm::provider_config::ConfigurationSystem; use std::path::PathBuf; use std::sync::Arc; @@ -465,20 +466,26 @@ async fn handle_switch_model( session_id, model_name ); - // Create new session model config - let new_model_config = match SessionModelConfig::for_model(model_name.to_string()) { - Ok(config) => config, + // Validate the requested model exists + let config_system = match ConfigurationSystem::load() { + Ok(system) => system, Err(e) => { - error!( - "Failed to load model configuration for {}: {}", - model_name, e - ); + error!("Failed to load model configuration: {}", e); return BackendResponse::Error { message: format!("Failed to load model configuration: {e}"), }; } }; + if config_system.get_model(model_name).is_none() { + error!("Model '{}' not found in configuration", model_name); + return BackendResponse::Error { + message: format!("Model '{model_name}' not found in configuration."), + }; + } + + let new_model_config = SessionModelConfig::new(model_name.to_string()); + let result = { let mut manager = multi_session_manager.lock().await; manager.set_session_model_config(session_id, Some(new_model_config)) From 51176e2f67220dde6f7eec559c2dce4cf0ebb099 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20A=C3=9Fmus?= Date: Sun, 26 Oct 2025 18:19:54 +0100 Subject: [PATCH 06/14] Drop forced UI update after compaction --- crates/code_assistant/src/agent/runner.rs | 119 +--------------------- crates/code_assistant/src/agent/tests.rs | 29 ++---- 2 files changed, 7 insertions(+), 141 deletions(-) diff --git a/crates/code_assistant/src/agent/runner.rs b/crates/code_assistant/src/agent/runner.rs index a425c1c6..77a04f64 100644 --- a/crates/code_assistant/src/agent/runner.rs +++ b/crates/code_assistant/src/agent/runner.rs @@ -6,10 +6,7 @@ use crate::session::SessionConfig; use crate::tools::core::{ResourcesTracker, ToolContext, ToolRegistry, ToolScope}; use crate::tools::{generate_system_message, ParserRegistry, ToolRequest}; use crate::types::*; -use crate::ui::gpui::elements::MessageRole as UiMessageRole; -use crate::ui::streaming::create_stream_processor; -use crate::ui::ui_events::{MessageData, ToolResultData}; -use crate::ui::{ToolStatus, UiEvent, UserInterface}; +use crate::ui::{UiEvent, UserInterface}; use crate::utils::CommandExecutor; use anyhow::Result; use llm::{ @@ -1107,118 +1104,6 @@ impl Agent { } } - fn build_ui_messages(&self) -> Result> { - struct DummyUI; - - #[async_trait::async_trait] - impl crate::ui::UserInterface for DummyUI { - async fn send_event( - &self, - _event: crate::ui::UiEvent, - ) -> Result<(), crate::ui::UIError> { - Ok(()) - } - - fn display_fragment( - &self, - _fragment: &crate::ui::DisplayFragment, - ) -> Result<(), crate::ui::UIError> { - Ok(()) - } - - fn should_streaming_continue(&self) -> bool { - true - } - - fn notify_rate_limit(&self, _seconds_remaining: u64) {} - - fn clear_rate_limit(&self) {} - - fn as_any(&self) -> &dyn std::any::Any { - self - } - } - - let dummy_ui: Arc = Arc::new(DummyUI); - let mut processor = create_stream_processor(self.session_config.tool_syntax, dummy_ui, 0); - - let mut messages_data = Vec::new(); - - for message in &self.message_history { - if message.role == MessageRole::User { - match &message.content { - MessageContent::Text(text) if text.trim().is_empty() => { - continue; - } - MessageContent::Structured(blocks) => { - let has_tool_results = blocks - .iter() - .any(|block| matches!(block, ContentBlock::ToolResult { .. })); - if has_tool_results { - continue; - } - } - _ => {} - } - } - - let fragments = processor.extract_fragments_from_message(message)?; - let role = match message.role { - MessageRole::User => UiMessageRole::User, - MessageRole::Assistant => UiMessageRole::Assistant, - }; - - messages_data.push(MessageData { role, fragments }); - } - - Ok(messages_data) - } - - fn build_ui_tool_results(&self) -> Result> { - let mut tool_results = Vec::new(); - let mut resources_tracker = ResourcesTracker::new(); - - for execution in &self.tool_executions { - let success = execution.result.is_success(); - let status = if success { - ToolStatus::Success - } else { - ToolStatus::Error - }; - - let short_output = execution.result.as_render().status(); - let output = execution.result.as_render().render(&mut resources_tracker); - - tool_results.push(ToolResultData { - tool_id: execution.tool_request.id.clone(), - status, - message: Some(short_output), - output: Some(output), - }); - } - - Ok(tool_results) - } - - async fn refresh_ui_after_compaction(&self) -> Result<()> { - if self.session_id.is_none() { - return Ok(()); - } - - let messages = self.build_ui_messages()?; - let tool_results = self.build_ui_tool_results()?; - - self.ui - .send_event(UiEvent::SetMessages { - messages, - session_id: self.session_id.clone(), - tool_results, - }) - .await?; - - Ok(()) - } - #[cfg(test)] pub fn set_test_session_metadata( &mut self, @@ -1320,8 +1205,6 @@ impl Agent { }; self.append_message(summary_message)?; - self.refresh_ui_after_compaction().await?; - Ok(()) } diff --git a/crates/code_assistant/src/agent/tests.rs b/crates/code_assistant/src/agent/tests.rs index 29b3bdca..b0aeb98f 100644 --- a/crates/code_assistant/src/agent/tests.rs +++ b/crates/code_assistant/src/agent/tests.rs @@ -9,7 +9,6 @@ use crate::tests::mocks::{ }; use crate::tests::utils::parse_and_truncate_llm_response; use crate::types::*; -use crate::ui::UiEvent; use anyhow::Result; use llm::types::*; use std::path::PathBuf; @@ -837,28 +836,12 @@ async fn test_context_compaction_inserts_summary() -> Result<()> { "Expected compaction prompt in LLM request" ); - // Ensure the UI received a SetMessages event with the compaction divider - let events = ui.events(); - let set_messages_event = events.iter().find_map(|event| { - if let UiEvent::SetMessages { messages, .. } = event { - Some(messages.clone()) - } else { - None - } - }); - let messages = set_messages_event.expect("Expected SetMessages event after compaction"); - let has_compaction_fragment = messages.iter().any(|message| { - message.fragments.iter().any(|fragment| { - matches!( - fragment, - crate::ui::DisplayFragment::CompactionDivider { .. } - ) - }) - }); - assert!( - has_compaction_fragment, - "Expected compaction divider fragment in UI messages" - ); + // Ensure the UI streaming output received the compaction divider fragment + let streaming_output = ui.get_streaming_output(); + let has_compaction_fragment = streaming_output + .iter() + .any(|chunk| chunk.starts_with("[compaction] ")); + assert!(has_compaction_fragment, "Expected streamed compaction divider fragment"); // Subsequent prompt should include the summary content let summary_in_followup = From 77115aacdbafc8cc377630f50aa27b93dba3bd88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20A=C3=9Fmus?= Date: Sun, 26 Oct 2025 20:04:02 +0100 Subject: [PATCH 07/14] Use Default::default for Message construction in tests and code --- crates/code_assistant/src/agent/runner.rs | 23 ++++---- crates/code_assistant/src/agent/tests.rs | 42 +++++++-------- crates/code_assistant/src/session/manager.rs | 3 +- .../src/tests/integration_tests.rs | 3 +- .../src/ui/streaming/caret_processor_tests.rs | 3 +- .../src/ui/streaming/json_processor.rs | 2 +- .../src/ui/streaming/json_processor_tests.rs | 9 ++-- .../src/ui/streaming/xml_processor_tests.rs | 9 ++-- crates/llm/src/anthropic.rs | 54 +++++++------------ crates/llm/src/openai_responses.rs | 21 +++----- crates/llm/src/tests.rs | 21 +++----- crates/llm/src/types.rs | 15 ++++++ 12 files changed, 87 insertions(+), 118 deletions(-) diff --git a/crates/code_assistant/src/agent/runner.rs b/crates/code_assistant/src/agent/runner.rs index 77a04f64..a1c317e7 100644 --- a/crates/code_assistant/src/agent/runner.rs +++ b/crates/code_assistant/src/agent/runner.rs @@ -304,8 +304,7 @@ impl Agent { let user_msg = Message { role: MessageRole::User, content: MessageContent::Text(pending_message.clone()), - request_id: None, - usage: None, + ..Default::default() }; self.append_message(user_msg)?; @@ -335,6 +334,7 @@ impl Agent { content: MessageContent::Structured(llm_response.content.clone()), request_id: Some(request_id), usage: Some(llm_response.usage.clone()), + ..Default::default() })?; } @@ -547,8 +547,7 @@ impl Agent { Message { role: MessageRole::User, content: MessageContent::Text(error_text), - request_id: None, - usage: None, + ..Default::default() } } _ => { @@ -570,8 +569,7 @@ impl Agent { start_time: Some(SystemTime::now()), end_time: None, }]), - request_id: None, - usage: None, + ..Default::default() } } }; @@ -622,8 +620,7 @@ impl Agent { let result_message = Message { role: MessageRole::User, content: MessageContent::Structured(content_blocks), - request_id: None, - usage: None, + ..Default::default() }; self.append_message(result_message)?; } @@ -649,8 +646,7 @@ impl Agent { let user_msg = Message { role: MessageRole::User, content: MessageContent::Text(task.clone()), - request_id: None, - usage: None, + ..Default::default() }; self.append_message(user_msg)?; @@ -847,6 +843,7 @@ impl Agent { content: MessageContent::Text(text_content.trim().to_string()), request_id: msg.request_id, usage: msg.usage.clone(), + ..Default::default() } } // For non-structured content, keep as is @@ -952,6 +949,7 @@ impl Agent { // Log messages for debugging /* for (i, message) in request.messages.iter().enumerate() { + debug!("Message {}:", i); debug!("Message {}:", i); // Using the Display trait implementation for Message let formatted_message = format!("{message}"); @@ -1187,8 +1185,7 @@ impl Agent { let compaction_message = Message { role: MessageRole::User, content: MessageContent::Text(CONTEXT_COMPACTION_PROMPT.to_string()), - request_id: None, - usage: None, + ..Default::default() }; self.append_message(compaction_message)?; @@ -1202,6 +1199,7 @@ impl Agent { content: MessageContent::Structured(vec![summary_block]), request_id: Some(request_id), usage: Some(response.usage.clone()), + ..Default::default() }; self.append_message(summary_message)?; @@ -1292,6 +1290,7 @@ impl Agent { content: MessageContent::Structured(new_blocks), request_id: msg.request_id, usage: msg.usage.clone(), + ..Default::default() }; messages.push(new_msg); } else { diff --git a/crates/code_assistant/src/agent/tests.rs b/crates/code_assistant/src/agent/tests.rs index b0aeb98f..feb03bda 100644 --- a/crates/code_assistant/src/agent/tests.rs +++ b/crates/code_assistant/src/agent/tests.rs @@ -524,8 +524,7 @@ async fn test_invalid_xml_tool_error_handling() -> Result<()> { let user_msg = Message { role: MessageRole::User, content: MessageContent::Text("Test task".to_string()), - request_id: None, - usage: None, + ..Default::default() }; agent.append_message(user_msg)?; @@ -788,8 +787,7 @@ async fn test_context_compaction_inserts_summary() -> Result<()> { agent.append_message(Message { role: MessageRole::User, content: MessageContent::Text("User request".to_string()), - request_id: None, - usage: None, + ..Default::default() })?; agent.append_message(Message { @@ -804,6 +802,7 @@ async fn test_context_compaction_inserts_summary() -> Result<()> { cache_creation_input_tokens: 0, cache_read_input_tokens: 0, }), + ..Default::default() })?; agent.run_single_iteration().await?; @@ -841,7 +840,10 @@ async fn test_context_compaction_inserts_summary() -> Result<()> { let has_compaction_fragment = streaming_output .iter() .any(|chunk| chunk.starts_with("[compaction] ")); - assert!(has_compaction_fragment, "Expected streamed compaction divider fragment"); + assert!( + has_compaction_fragment, + "Expected streamed compaction divider fragment" + ); // Subsequent prompt should include the summary content let summary_in_followup = @@ -883,15 +885,14 @@ fn test_ui_filtering_with_failed_tool_messages() -> Result<()> { Message { role: MessageRole::User, content: MessageContent::Text("Hello, please help me".to_string()), - request_id: None, - usage: None, + ..Default::default() }, // Assistant response Message { role: MessageRole::Assistant, content: MessageContent::Text("I'll help you".to_string()), request_id: Some(1), - usage: None, + ..Default::default() }, // Parse error message in XML mode - should be filtered out Message { @@ -900,8 +901,7 @@ fn test_ui_filtering_with_failed_tool_messages() -> Result<()> { "tool-1-0", "Tool error: Unknown tool 'invalid_tool'. Please use only available tools.", )]), - request_id: None, - usage: None, + ..Default::default() }, // Regular tool result - should be filtered out Message { @@ -910,22 +910,19 @@ fn test_ui_filtering_with_failed_tool_messages() -> Result<()> { "regular-tool-123", "File contents here", )]), - request_id: None, - usage: None, + ..Default::default() }, // Empty user message (legacy) - should be filtered out Message { role: MessageRole::User, content: MessageContent::Text("".to_string()), - request_id: None, - usage: None, + ..Default::default() }, // Another regular user message - should be included Message { role: MessageRole::User, content: MessageContent::Text("Thank you for the help!".to_string()), - request_id: None, - usage: None, + ..Default::default() }, ]; session.tool_executions = Vec::new(); @@ -1279,8 +1276,7 @@ fn test_inject_naming_reminder_skips_tool_result_messages() -> Result<()> { let messages = vec![Message { role: MessageRole::User, content: MessageContent::Text("Hello, help me with a task".to_string()), - request_id: None, - usage: None, + ..Default::default() }]; let result_messages = agent.inject_naming_reminder_if_needed(messages.clone()); @@ -1313,8 +1309,7 @@ fn test_inject_naming_reminder_skips_tool_result_messages() -> Result<()> { Message { role: MessageRole::User, content: MessageContent::Text("Hello, help me with a task".to_string()), - request_id: None, - usage: None, + ..Default::default() }, Message { role: MessageRole::Assistant, @@ -1323,6 +1318,7 @@ fn test_inject_naming_reminder_skips_tool_result_messages() -> Result<()> { )]), request_id: Some(1), usage: Some(Usage::zero()), + ..Default::default() }, Message { role: MessageRole::User, @@ -1330,8 +1326,7 @@ fn test_inject_naming_reminder_skips_tool_result_messages() -> Result<()> { "tool-1-1", "Tool execution result", )]), - request_id: None, - usage: None, + ..Default::default() }, ]; @@ -1383,8 +1378,7 @@ fn test_inject_naming_reminder_skips_tool_result_messages() -> Result<()> { ContentBlock::new_text("Please analyze this file"), ContentBlock::new_tool_result("tool-1-1", "Previous tool result"), ]), - request_id: None, - usage: None, + ..Default::default() }]; let result_messages = agent.inject_naming_reminder_if_needed(mixed_message.clone()); diff --git a/crates/code_assistant/src/session/manager.rs b/crates/code_assistant/src/session/manager.rs index 9c91d870..d19b5c5b 100644 --- a/crates/code_assistant/src/session/manager.rs +++ b/crates/code_assistant/src/session/manager.rs @@ -221,8 +221,7 @@ impl SessionManager { let user_msg = Message { role: llm::MessageRole::User, content: llm::MessageContent::Structured(content_blocks), - request_id: None, - usage: None, + ..Default::default() }; session_instance.add_message(user_msg); diff --git a/crates/code_assistant/src/tests/integration_tests.rs b/crates/code_assistant/src/tests/integration_tests.rs index 36f624d8..49ed8d5e 100644 --- a/crates/code_assistant/src/tests/integration_tests.rs +++ b/crates/code_assistant/src/tests/integration_tests.rs @@ -179,8 +179,7 @@ async fn test_tool_limit_with_realistic_anthropic_chunks() -> Result<()> { messages: vec![Message { role: MessageRole::User, content: MessageContent::Text("Please refactor the Anthropic client".to_string()), - request_id: None, - usage: None, + ..Default::default() }], system_prompt: "You are a helpful assistant.".to_string(), ..Default::default() diff --git a/crates/code_assistant/src/ui/streaming/caret_processor_tests.rs b/crates/code_assistant/src/ui/streaming/caret_processor_tests.rs index 47b389f3..ad99b8bb 100644 --- a/crates/code_assistant/src/ui/streaming/caret_processor_tests.rs +++ b/crates/code_assistant/src/ui/streaming/caret_processor_tests.rs @@ -212,8 +212,7 @@ async fn test_extract_fragments_from_complete_message() { content: MessageContent::Text( "I'll create the file for you.\n\n^^^list_projects\n^^^".to_string(), ), - request_id: None, - usage: None, + ..Default::default() }; let fragments = processor.extract_fragments_from_message(&message).unwrap(); diff --git a/crates/code_assistant/src/ui/streaming/json_processor.rs b/crates/code_assistant/src/ui/streaming/json_processor.rs index 110eaed0..0d35b31a 100644 --- a/crates/code_assistant/src/ui/streaming/json_processor.rs +++ b/crates/code_assistant/src/ui/streaming/json_processor.rs @@ -1163,7 +1163,7 @@ mod tests { role: MessageRole::Assistant, content: MessageContent::Structured(vec![redacted_thinking]), request_id: Some(1), - usage: None, + ..Default::default() }; // Extract fragments from the complete message (as would happen when loading a session) diff --git a/crates/code_assistant/src/ui/streaming/json_processor_tests.rs b/crates/code_assistant/src/ui/streaming/json_processor_tests.rs index 08f890d1..3e8c9988 100644 --- a/crates/code_assistant/src/ui/streaming/json_processor_tests.rs +++ b/crates/code_assistant/src/ui/streaming/json_processor_tests.rs @@ -1063,8 +1063,7 @@ mod tests { "Let me analyze this. This is complex. Here's my answer." .to_string(), ), - request_id: None, - usage: None, + ..Default::default() }; let fragments = processor.extract_fragments_from_message(&message).unwrap(); @@ -1097,8 +1096,7 @@ mod tests { llm::ContentBlock::new_text("I'll read the file for you."), llm::ContentBlock::new_tool_use("tool_123", "read_files", tool_input), ]), - request_id: None, - usage: None, + ..Default::default() }; let fragments = processor.extract_fragments_from_message(&message).unwrap(); @@ -1152,8 +1150,7 @@ mod tests { ), llm::ContentBlock::new_text("File has been written."), ]), - request_id: None, - usage: None, + ..Default::default() }; let fragments = processor.extract_fragments_from_message(&message).unwrap(); diff --git a/crates/code_assistant/src/ui/streaming/xml_processor_tests.rs b/crates/code_assistant/src/ui/streaming/xml_processor_tests.rs index da1ffeb0..fd6d6fcc 100644 --- a/crates/code_assistant/src/ui/streaming/xml_processor_tests.rs +++ b/crates/code_assistant/src/ui/streaming/xml_processor_tests.rs @@ -271,7 +271,7 @@ mod tests { "I'll help you. Let me plan this. Here's what I'll do: main.rs".to_string() ), request_id: Some(1u64), - usage: None, + ..Default::default() }; let fragments = processor.extract_fragments_from_message(&message).unwrap(); @@ -317,7 +317,7 @@ mod tests { llm::ContentBlock::new_tool_use("search_456", "search_files", tool_input), ]), request_id: Some(1u64), - usage: None, + ..Default::default() }; let fragments = processor.extract_fragments_from_message(&message).unwrap(); @@ -368,7 +368,7 @@ mod tests { ), ]), request_id: Some(1u64), - usage: None, + ..Default::default() }; let fragments = processor.extract_fragments_from_message(&message).unwrap(); @@ -457,8 +457,7 @@ mod tests { content: MessageContent::Text( "Please use to read test.txt and show me what should I do".to_string() ), - request_id: None, - usage: None, + ..Default::default() }; let fragments = processor diff --git a/crates/llm/src/anthropic.rs b/crates/llm/src/anthropic.rs index 292e3467..e61ae826 100644 --- a/crates/llm/src/anthropic.rs +++ b/crates/llm/src/anthropic.rs @@ -1405,8 +1405,7 @@ mod tests { Message { role, content: MessageContent::Text(text.to_string()), - request_id: None, - usage: None, + ..Default::default() } } @@ -1523,8 +1522,7 @@ mod tests { end_time: None, }, ]), - request_id: None, - usage: None, + ..Default::default() } } @@ -1539,8 +1537,7 @@ mod tests { start_time: None, end_time: None, }]), - request_id: None, - usage: None, + ..Default::default() } } @@ -1550,8 +1547,7 @@ mod tests { messages.push(Message { role: MessageRole::User, content: MessageContent::Text(format!("Request {i}")), - request_id: None, - usage: None, + ..Default::default() }); messages.push(create_tool_message(&format!("tool_{i}"))); messages.push(create_tool_result( @@ -1605,8 +1601,7 @@ mod tests { .map(|i| Message { role: MessageRole::User, content: MessageContent::Text(format!("Prelude {i}")), - request_id: None, - usage: None, + ..Default::default() }) .collect(); @@ -1625,8 +1620,7 @@ mod tests { end_time: None, }, ]), - request_id: None, - usage: None, + ..Default::default() }); let result = converter.convert_messages_with_cache(messages); @@ -1662,8 +1656,7 @@ mod tests { MessageRole::Assistant }, content: MessageContent::Text(format!("Message A{i}")), - request_id: None, - usage: None, + ..Default::default() }) .collect(); @@ -1675,8 +1668,7 @@ mod tests { MessageRole::Assistant }, content: MessageContent::Text(format!("Completely different B{i}")), - request_id: None, - usage: None, + ..Default::default() }) .collect(); @@ -1726,8 +1718,7 @@ mod tests { .map(|i| Message { role: MessageRole::User, content: MessageContent::Text(format!("Short {i}")), - request_id: None, - usage: None, + ..Default::default() }) .collect(); @@ -1753,8 +1744,7 @@ mod tests { messages.push(Message { role: MessageRole::User, content: MessageContent::Text("Help me analyze this data".to_string()), - request_id: None, - usage: None, + ..Default::default() }); // Assistant response with tool use @@ -1774,8 +1764,7 @@ mod tests { end_time: None, }, ]), - request_id: None, - usage: None, + ..Default::default() }); // Tool result @@ -1788,8 +1777,7 @@ mod tests { start_time: None, end_time: None, }]), - request_id: None, - usage: None, + ..Default::default() }); // Continue building conversation to 20+ messages @@ -1797,8 +1785,7 @@ mod tests { messages.push(Message { role: MessageRole::User, content: MessageContent::Text(format!("Follow up question {i}")), - request_id: None, - usage: None, + ..Default::default() }); messages.push(Message { @@ -1817,8 +1804,7 @@ mod tests { end_time: None, }, ]), - request_id: None, - usage: None, + ..Default::default() }); messages.push(Message { @@ -1830,8 +1816,7 @@ mod tests { start_time: None, end_time: None, }]), - request_id: None, - usage: None, + ..Default::default() }); } @@ -1970,8 +1955,7 @@ mod tests { error_messages.push(Message { role: MessageRole::User, content: MessageContent::Text("Try something that might fail".to_string()), - request_id: None, - usage: None, + ..Default::default() }); error_messages.push(Message { @@ -1990,8 +1974,7 @@ mod tests { end_time: None, }, ]), - request_id: None, - usage: None, + ..Default::default() }); error_messages.push(Message { @@ -2003,8 +1986,7 @@ mod tests { start_time: None, end_time: None, }]), - request_id: None, - usage: None, + ..Default::default() }); // Convert with error scenario diff --git a/crates/llm/src/openai_responses.rs b/crates/llm/src/openai_responses.rs index 4175b09d..8105cad6 100644 --- a/crates/llm/src/openai_responses.rs +++ b/crates/llm/src/openai_responses.rs @@ -1295,8 +1295,7 @@ mod tests { let messages = vec![Message { role: MessageRole::User, content: MessageContent::Text("Hello".to_string()), - request_id: None, - usage: None, + ..Default::default() }]; let converted = client.convert_messages(messages); @@ -1334,8 +1333,7 @@ mod tests { start_time: None, end_time: None, }]), - request_id: None, - usage: None, + ..Default::default() }]; let converted = client.convert_messages(messages); @@ -1456,8 +1454,7 @@ mod tests { Message { role: MessageRole::User, content: MessageContent::Text("What's 2+2?".to_string()), - request_id: None, - usage: None, + ..Default::default() }, Message { role: MessageRole::Assistant, @@ -1477,14 +1474,12 @@ mod tests { end_time: None, }, ]), - request_id: None, - usage: None, + ..Default::default() }, Message { role: MessageRole::User, content: MessageContent::Text("What about 3+3?".to_string()), - request_id: None, - usage: None, + ..Default::default() }, ]; @@ -1593,8 +1588,7 @@ mod tests { end_time: None, }, ]), - request_id: None, - usage: None, + ..Default::default() }]; let converted = client.convert_messages(messages); @@ -1689,8 +1683,7 @@ mod tests { end_time: None, }, ]), - request_id: None, - usage: None, + ..Default::default() }]; let converted = client.convert_messages(messages); diff --git a/crates/llm/src/tests.rs b/crates/llm/src/tests.rs index aaf4086d..9943472b 100644 --- a/crates/llm/src/tests.rs +++ b/crates/llm/src/tests.rs @@ -31,8 +31,7 @@ impl TestCase { messages: vec![Message { role: MessageRole::User, content: MessageContent::Text("Hello".to_string()), - request_id: None, - usage: None, + ..Default::default() }], system_prompt: "You are a helpful assistant.".to_string(), ..Default::default() @@ -68,8 +67,7 @@ impl TestCase { messages: vec![Message { role: MessageRole::User, content: MessageContent::Text("What's the weather?".to_string()), - request_id: None, - usage: None, + ..Default::default() }], system_prompt: "Use the weather tool.".to_string(), tools: Some(vec![ToolDefinition { @@ -823,8 +821,7 @@ async fn test_anthropic_rate_limit_retry() -> Result<()> { messages: vec![Message { role: MessageRole::User, content: MessageContent::Text("Hello".to_string()), - request_id: None, - usage: None, + ..Default::default() }], system_prompt: "You are a helpful assistant.".to_string(), ..Default::default() @@ -886,8 +883,7 @@ async fn test_openai_message_conversion() -> Result<()> { let text_message = Message { role: MessageRole::User, content: MessageContent::Text("Hello world".to_string()), - request_id: None, - usage: None, + ..Default::default() }; let openai_messages = OpenAIClient::convert_message(&text_message); @@ -906,8 +902,7 @@ async fn test_openai_message_conversion() -> Result<()> { ContentBlock::new_text("What do you see in this image?"), ContentBlock::new_image_base64("image/png", image_data), ]), - request_id: None, - usage: None, + ..Default::default() }; let openai_messages = OpenAIClient::convert_message(&mixed_message); @@ -947,8 +942,7 @@ async fn test_openai_message_conversion() -> Result<()> { serde_json::json!({"location": "Berlin"}), ), ]), - request_id: None, - usage: None, + ..Default::default() }; let openai_messages = OpenAIClient::convert_message(&assistant_message); @@ -972,8 +966,7 @@ async fn test_openai_message_conversion() -> Result<()> { ContentBlock::new_tool_result("tool_123", "Weather is sunny, 25°C"), ContentBlock::new_text("What should I wear?"), ]), - request_id: None, - usage: None, + ..Default::default() }; let openai_messages = OpenAIClient::convert_message(&user_with_tool_result); diff --git a/crates/llm/src/types.rs b/crates/llm/src/types.rs index 4996b30c..da4afbb3 100644 --- a/crates/llm/src/types.rs +++ b/crates/llm/src/types.rs @@ -79,6 +79,21 @@ pub struct Message { /// Token usage for assistant messages (tracks context size and costs) #[serde(skip_serializing_if = "Option::is_none")] pub usage: Option, + /// Indicates this message is a compaction summary divider + #[serde(default)] + pub is_compaction_summary: bool, +} + +impl Default for Message { + fn default() -> Self { + Self { + role: MessageRole::User, + content: MessageContent::Text(String::new()), + usage: None, + request_id: None, + is_compaction_summary: false, + } + } } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] From e3116c7c7dd25af26d1d14e1bbfb2cb180f29059 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20A=C3=9Fmus?= Date: Sun, 26 Oct 2025 20:49:21 +0100 Subject: [PATCH 08/14] Remove ContentBlock::CompactionSummary and use is_compaction_summary flag Switch compaction summaries to user messages tagged with is_compaction_summary instead of a dedicated ContentBlock variant. Update message filtering, UI rendering, and tests to use the new flag. Remove all code handling CompactionSummary blocks. Update documentation to reflect the new approach. --- crates/code_assistant/src/agent/runner.rs | 153 ++++++++++-------- crates/code_assistant/src/agent/tests.rs | 32 ++-- crates/code_assistant/src/session/instance.rs | 21 +++ .../src/ui/streaming/caret_processor.rs | 5 - .../src/ui/streaming/json_processor.rs | 5 - .../src/ui/streaming/xml_processor.rs | 5 - crates/llm/src/anthropic.rs | 16 +- crates/llm/src/display.rs | 3 - crates/llm/src/openai_responses.rs | 8 - crates/llm/src/types.rs | 27 +--- docs/context-compaction.md | 8 +- 11 files changed, 125 insertions(+), 158 deletions(-) diff --git a/crates/code_assistant/src/agent/runner.rs b/crates/code_assistant/src/agent/runner.rs index a1c317e7..fb9ffb22 100644 --- a/crates/code_assistant/src/agent/runner.rs +++ b/crates/code_assistant/src/agent/runner.rs @@ -6,7 +6,7 @@ use crate::session::SessionConfig; use crate::tools::core::{ResourcesTracker, ToolContext, ToolRegistry, ToolScope}; use crate::tools::{generate_system_message, ParserRegistry, ToolRequest}; use crate::types::*; -use crate::ui::{UiEvent, UserInterface}; +use crate::ui::{DisplayFragment, UiEvent, UserInterface}; use crate::utils::CommandExecutor; use anyhow::Result; use llm::{ @@ -796,15 +796,12 @@ impl Agent { .map(|msg| { match &msg.content { MessageContent::Structured(blocks) => { - // Check if there are any ToolResult or CompactionSummary blocks that need conversion + // Check if there are any ToolResult blocks that need conversion let has_tool_results = blocks .iter() .any(|block| matches!(block, ContentBlock::ToolResult { .. })); - let has_compaction_summary = blocks - .iter() - .any(|block| matches!(block, ContentBlock::CompactionSummary { .. })); - if !has_tool_results && !has_compaction_summary { + if !has_tool_results { // No conversion needed return msg; } @@ -827,12 +824,6 @@ impl Agent { text_content.push_str(text); text_content.push_str("\n\n"); } - ContentBlock::CompactionSummary { text, .. } => { - let formatted = - Self::format_compaction_summary_for_prompt(text); - text_content.push_str(&formatted); - text_content.push_str("\n\n"); - } _ => {} // Ignore other block types } } @@ -1063,13 +1054,47 @@ impl Agent { Ok((response, request_id)) } - fn message_contains_compaction_summary(message: &Message) -> bool { - match &message.content { - MessageContent::Structured(blocks) => blocks - .iter() - .any(|block| matches!(block, ContentBlock::CompactionSummary { .. })), - _ => false, - } + async fn get_non_streaming_response( + &mut self, + messages: Vec, + ) -> Result<(llm::LLMResponse, u64)> { + let request_id = self.next_request_id; + self.next_request_id += 1; + + let messages_with_reminder = self.inject_naming_reminder_if_needed(messages); + + let converted_messages = match self.tool_syntax() { + ToolSyntax::Native => messages_with_reminder, + _ => self.convert_tool_results_to_text(messages_with_reminder), + }; + + let request = LLMRequest { + messages: converted_messages, + system_prompt: self.get_system_prompt(), + tools: match self.tool_syntax() { + ToolSyntax::Native => { + Some(crate::tools::AnnotatedToolDefinition::to_tool_definitions( + ToolRegistry::global().get_tool_definitions_for_scope(self.tool_scope), + )) + } + ToolSyntax::Xml => None, + ToolSyntax::Caret => None, + }, + stop_sequences: None, + request_id, + session_id: self.session_id.clone().unwrap_or_default(), + }; + + let response = self.llm_provider.send_message(request, None).await?; + + debug!( + "Compaction response usage — Input: {}, Output: {}, Cache Read: {}", + response.usage.input_tokens, + response.usage.output_tokens, + response.usage.cache_read_input_tokens + ); + + Ok((response, request_id)) } fn format_compaction_summary_for_prompt(summary: &str) -> String { @@ -1085,7 +1110,6 @@ impl Agent { let mut collected = Vec::new(); for block in blocks { match block { - ContentBlock::CompactionSummary { text, .. } => collected.push(text.as_str()), ContentBlock::Text { text, .. } => collected.push(text.as_str()), ContentBlock::Thinking { thinking, .. } => { collected.push(thinking.as_str()); @@ -1102,6 +1126,18 @@ impl Agent { } } + fn active_messages(&self) -> &[Message] { + if self.message_history.is_empty() { + return &[]; + } + let start = self + .message_history + .iter() + .rposition(|message| message.is_compaction_summary) + .unwrap_or(0); + &self.message_history[start..] + } + #[cfg(test)] pub fn set_test_session_metadata( &mut self, @@ -1142,23 +1178,15 @@ impl Agent { return Ok(None); } - let last_assistant = self - .message_history - .iter() - .rev() - .find(|msg| matches!(msg.role, MessageRole::Assistant)); - - if let Some(message) = last_assistant { - if Self::message_contains_compaction_summary(message) { - return Ok(None); + for message in self.active_messages().iter().rev() { + if !matches!(message.role, MessageRole::Assistant) { + continue; } - if let Some(usage) = &message.usage { let used_tokens = usage.input_tokens + usage.cache_read_input_tokens; - if used_tokens == 0 { - return Ok(None); + if used_tokens > 0 { + return Ok(Some(used_tokens as f32 / limit as f32)); } - return Ok(Some(used_tokens as f32 / limit as f32)); } } @@ -1190,19 +1218,22 @@ impl Agent { self.append_message(compaction_message)?; let messages = self.render_tool_results_in_messages(); - let (response, request_id) = self.get_next_assistant_message(messages).await?; + let (response, _) = self.get_non_streaming_response(messages).await?; let summary_text = Self::extract_compaction_summary_text(&response.content); - let summary_block = ContentBlock::new_compaction_summary(summary_text.clone()); let summary_message = Message { - role: MessageRole::Assistant, - content: MessageContent::Structured(vec![summary_block]), - request_id: Some(request_id), - usage: Some(response.usage.clone()), + role: MessageRole::User, + content: MessageContent::Text(summary_text.clone()), + is_compaction_summary: true, ..Default::default() }; self.append_message(summary_message)?; + let divider = DisplayFragment::CompactionDivider { + summary: summary_text.trim().to_string(), + }; + self.ui.display_fragment(÷r)?; + Ok(()) } @@ -1225,14 +1256,8 @@ impl Agent { tool_outputs.insert(tool_use_id.clone(), rendered_output); } - let start_index = self - .message_history - .iter() - .rposition(Self::message_contains_compaction_summary) - .unwrap_or(0); - // Now rebuild the message history, replacing tool outputs with our dynamically rendered versions - for msg in self.message_history.iter().skip(start_index) { + for msg in self.active_messages() { match &msg.content { MessageContent::Structured(blocks) => { // Look for ToolResult blocks @@ -1264,18 +1289,6 @@ impl Agent { new_blocks.push(block.clone()); } } - ContentBlock::CompactionSummary { - text, - start_time, - end_time, - } => { - new_blocks.push(ContentBlock::Text { - text: Self::format_compaction_summary_for_prompt(text), - start_time: *start_time, - end_time: *end_time, - }); - need_update = true; - } _ => { // Keep other blocks as is new_blocks.push(block.clone()); @@ -1284,23 +1297,23 @@ impl Agent { } if need_update { - // Create a new message with updated blocks - let new_msg = Message { - role: msg.role.clone(), - content: MessageContent::Structured(new_blocks), - request_id: msg.request_id, - usage: msg.usage.clone(), - ..Default::default() - }; - messages.push(new_msg); + let mut updated = msg.clone(); + updated.content = MessageContent::Structured(new_blocks); + messages.push(updated); } else { // No changes needed, use original message messages.push(msg.clone()); } } - _ => { - // For non-tool messages, just copy them as is - messages.push(msg.clone()); + MessageContent::Text(text) => { + if msg.is_compaction_summary { + let mut updated = msg.clone(); + updated.content = + MessageContent::Text(Self::format_compaction_summary_for_prompt(text)); + messages.push(updated); + } else { + messages.push(msg.clone()); + } } } } diff --git a/crates/code_assistant/src/agent/tests.rs b/crates/code_assistant/src/agent/tests.rs index feb03bda..03b4eda2 100644 --- a/crates/code_assistant/src/agent/tests.rs +++ b/crates/code_assistant/src/agent/tests.rs @@ -808,20 +808,18 @@ async fn test_context_compaction_inserts_summary() -> Result<()> { agent.run_single_iteration().await?; // Ensure a compaction summary message was added - let summary_message = - agent - .message_history_for_tests() - .iter() - .find(|message| match &message.content { - MessageContent::Structured(blocks) => blocks - .iter() - .any(|block| matches!(block, ContentBlock::CompactionSummary { .. })), - _ => false, - }); - assert!( - summary_message.is_some(), - "Expected compaction summary in history" - ); + let summary_message = agent + .message_history_for_tests() + .iter() + .find(|message| message.is_compaction_summary) + .cloned() + .expect("Expected compaction summary in history"); + assert_eq!(summary_message.role, MessageRole::User); + let stored_summary = match summary_message.content { + MessageContent::Text(ref text) => text, + MessageContent::Structured(_) => panic!("Summary should be stored as text"), + }; + assert_eq!(stored_summary, summary_text); // The compaction prompt should have been sent to the provider let requests = mock_llm_ref.get_requests(); @@ -835,14 +833,14 @@ async fn test_context_compaction_inserts_summary() -> Result<()> { "Expected compaction prompt in LLM request" ); - // Ensure the UI streaming output received the compaction divider fragment + // Ensure the UI received a SetMessages event with the compaction divider let streaming_output = ui.get_streaming_output(); let has_compaction_fragment = streaming_output .iter() - .any(|chunk| chunk.starts_with("[compaction] ")); + .any(|chunk| chunk.starts_with("[compaction] ") && chunk.contains(summary_text)); assert!( has_compaction_fragment, - "Expected streamed compaction divider fragment" + "Expected compaction divider fragment with summary text" ); // Subsequent prompt should include the summary content diff --git a/crates/code_assistant/src/session/instance.rs b/crates/code_assistant/src/session/instance.rs index 569c3a38..e60e1c85 100644 --- a/crates/code_assistant/src/session/instance.rs +++ b/crates/code_assistant/src/session/instance.rs @@ -301,6 +301,27 @@ impl SessionInstance { ); for message in &self.session.messages { + if message.is_compaction_summary { + let summary = match &message.content { + llm::MessageContent::Text(text) => text.trim().to_string(), + llm::MessageContent::Structured(blocks) => blocks + .iter() + .filter_map(|block| match block { + llm::ContentBlock::Text { text, .. } => Some(text.as_str()), + llm::ContentBlock::Thinking { thinking, .. } => Some(thinking.as_str()), + _ => None, + }) + .collect::>() + .join("\n") + .trim() + .to_string(), + }; + messages_data.push(MessageData { + role: MessageRole::User, + fragments: vec![crate::ui::DisplayFragment::CompactionDivider { summary }], + }); + continue; + } // Filter out tool-result user messages (they have tool IDs in structured content) if message.role == llm::MessageRole::User { match &message.content { diff --git a/crates/code_assistant/src/ui/streaming/caret_processor.rs b/crates/code_assistant/src/ui/streaming/caret_processor.rs index b9a6f415..507d2980 100644 --- a/crates/code_assistant/src/ui/streaming/caret_processor.rs +++ b/crates/code_assistant/src/ui/streaming/caret_processor.rs @@ -236,11 +236,6 @@ impl StreamProcessorTrait for CaretStreamProcessor { data: data.clone(), }); } - llm::ContentBlock::CompactionSummary { text, .. } => { - fragments.push(DisplayFragment::CompactionDivider { - summary: text.clone(), - }); - } } } } diff --git a/crates/code_assistant/src/ui/streaming/json_processor.rs b/crates/code_assistant/src/ui/streaming/json_processor.rs index 0d35b31a..4750314f 100644 --- a/crates/code_assistant/src/ui/streaming/json_processor.rs +++ b/crates/code_assistant/src/ui/streaming/json_processor.rs @@ -333,11 +333,6 @@ impl StreamProcessorTrait for JsonStreamProcessor { data: data.clone(), }); } - ContentBlock::CompactionSummary { text, .. } => { - fragments.push(DisplayFragment::CompactionDivider { - summary: text.clone(), - }); - } } } } diff --git a/crates/code_assistant/src/ui/streaming/xml_processor.rs b/crates/code_assistant/src/ui/streaming/xml_processor.rs index cf4352a0..38df8d52 100644 --- a/crates/code_assistant/src/ui/streaming/xml_processor.rs +++ b/crates/code_assistant/src/ui/streaming/xml_processor.rs @@ -265,11 +265,6 @@ impl StreamProcessorTrait for XmlStreamProcessor { data: data.clone(), }); } - ContentBlock::CompactionSummary { text, .. } => { - fragments.push(DisplayFragment::CompactionDivider { - summary: text.clone(), - }); - } } } } diff --git a/crates/llm/src/anthropic.rs b/crates/llm/src/anthropic.rs index e61ae826..8f45d6bc 100644 --- a/crates/llm/src/anthropic.rs +++ b/crates/llm/src/anthropic.rs @@ -187,11 +187,6 @@ impl DefaultMessageConverter { AnthropicBlockContent::RedactedThinking { data }, false, ), - ContentBlock::CompactionSummary { text, .. } => ( - "text".to_string(), - AnthropicBlockContent::Text { text }, - true, - ), }; let cache_control = @@ -1143,10 +1138,6 @@ impl AnthropicClient { ContentBlock::ToolResult { end_time, .. } => { *end_time = Some(now); } - ContentBlock::CompactionSummary { text, end_time, .. } => { - *text = current_content.clone(); - *end_time = Some(now); - } } } _ => {} @@ -1197,10 +1188,6 @@ impl AnthropicClient { } *end_time = Some(now); } - ContentBlock::CompactionSummary { text, end_time, .. } => { - *text = current_content.clone(); - *end_time = Some(now); - } _ => {} } } @@ -1233,8 +1220,7 @@ impl AnthropicClient { | ContentBlock::Text { end_time, .. } | ContentBlock::Image { end_time, .. } | ContentBlock::ToolUse { end_time, .. } - | ContentBlock::ToolResult { end_time, .. } - | ContentBlock::CompactionSummary { end_time, .. } => { + | ContentBlock::ToolResult { end_time, .. } => { if end_time.is_none() { *end_time = Some(now); } diff --git a/crates/llm/src/display.rs b/crates/llm/src/display.rs index f3704bd3..55142ce5 100644 --- a/crates/llm/src/display.rs +++ b/crates/llm/src/display.rs @@ -65,9 +65,6 @@ impl fmt::Display for ContentBlock { writeln!(f, "RedactedThinking")?; writeln!(f, " Data: {}", data.replace('\n', "\n ")) } - ContentBlock::CompactionSummary { text, .. } => { - writeln!(f, "CompactionSummary: {}", text.replace('\n', "\n ")) - } } } } diff --git a/crates/llm/src/openai_responses.rs b/crates/llm/src/openai_responses.rs index 8105cad6..c7568b1a 100644 --- a/crates/llm/src/openai_responses.rs +++ b/crates/llm/src/openai_responses.rs @@ -468,14 +468,6 @@ impl OpenAIResponsesClient { .push(ResponseContentItem::OutputText { text: thinking }); } }, - ContentBlock::CompactionSummary { text, .. } => match role { - MessageRole::User => { - current_message_content.push(ResponseContentItem::InputText { text }); - } - MessageRole::Assistant => { - current_message_content.push(ResponseContentItem::OutputText { text }); - } - }, // Non-message content blocks: flush current message and add as separate items ContentBlock::ToolResult { tool_use_id, diff --git a/crates/llm/src/types.rs b/crates/llm/src/types.rs index da4afbb3..30d3753c 100644 --- a/crates/llm/src/types.rs +++ b/crates/llm/src/types.rs @@ -89,8 +89,8 @@ impl Default for Message { Self { role: MessageRole::User, content: MessageContent::Text(String::new()), - usage: None, request_id: None, + usage: None, is_compaction_summary: false, } } @@ -177,14 +177,6 @@ pub enum ContentBlock { #[serde(skip_serializing_if = "Option::is_none")] end_time: Option, }, - #[serde(rename = "compaction_summary")] - CompactionSummary { - text: String, - #[serde(skip_serializing_if = "Option::is_none")] - start_time: Option, - #[serde(skip_serializing_if = "Option::is_none")] - end_time: Option, - }, } /// Rate limit information extracted from LLM provider headers @@ -330,15 +322,6 @@ impl ContentBlock { } } - /// Create a compaction summary block from a String - pub fn new_compaction_summary(text: impl Into) -> Self { - ContentBlock::CompactionSummary { - text: text.into(), - start_time: None, - end_time: None, - } - } - /// Create an image content block from raw image data pub fn new_image(media_type: impl Into, data: impl AsRef<[u8]>) -> Self { use base64::Engine as _; @@ -406,7 +389,6 @@ impl ContentBlock { ContentBlock::Image { start_time, .. } => *start_time, ContentBlock::ToolUse { start_time, .. } => *start_time, ContentBlock::ToolResult { start_time, .. } => *start_time, - ContentBlock::CompactionSummary { start_time, .. } => *start_time, } } @@ -419,7 +401,6 @@ impl ContentBlock { ContentBlock::Image { end_time, .. } => *end_time, ContentBlock::ToolUse { end_time, .. } => *end_time, ContentBlock::ToolResult { end_time, .. } => *end_time, - ContentBlock::CompactionSummary { end_time, .. } => *end_time, } } @@ -432,7 +413,6 @@ impl ContentBlock { ContentBlock::Image { start_time, .. } => *start_time = Some(time), ContentBlock::ToolUse { start_time, .. } => *start_time = Some(time), ContentBlock::ToolResult { start_time, .. } => *start_time = Some(time), - ContentBlock::CompactionSummary { start_time, .. } => *start_time = Some(time), } self } @@ -446,7 +426,6 @@ impl ContentBlock { ContentBlock::Image { end_time, .. } => *end_time = Some(time), ContentBlock::ToolUse { end_time, .. } => *end_time = Some(time), ContentBlock::ToolResult { end_time, .. } => *end_time = Some(time), - ContentBlock::CompactionSummary { end_time, .. } => *end_time = Some(time), } self } @@ -555,10 +534,6 @@ impl ContentBlock { .. }, ) => a_id == b_id && a_content == b_content && a_error == b_error, - ( - ContentBlock::CompactionSummary { text: a_text, .. }, - ContentBlock::CompactionSummary { text: b_text, .. }, - ) => a_text == b_text, _ => false, // Different variants } } diff --git a/docs/context-compaction.md b/docs/context-compaction.md index 48e47460..9333ae9a 100644 --- a/docs/context-compaction.md +++ b/docs/context-compaction.md @@ -6,13 +6,13 @@ This document outlines the phased approach for adding automatic context compacti - Require a `context_token_limit` field in every model entry in `models.json`. - Update the configuration loader (`crates/llm/src/provider_config.rs`) and validation logic to deserialize, store, and surface this limit. - Propagate the limit into `SessionModelConfig` (`crates/code_assistant/src/persistence.rs`) and ensure session creation (`crates/code_assistant/src/session/manager.rs`) records it. -- Introduce `ContentBlock::CompactionSummary { text: String }` in `crates/llm/src/types.rs` and adjust serialization, deserialization, and any exhaustive `match` arms that enumerate block variants. +- Extend `llm::Message` with an `is_compaction_summary` flag (serde-defaulted to `false`) so we can tag summary messages without adding new content block variants. - **Tests:** extend existing configuration loading tests (or add new ones) to assert `context_token_limit` is required and correctly parsed; add coverage verifying the new content block round-trips through serialization. ## Phase 2 – Agent Compaction Logic - Add helpers in `crates/code_assistant/src/agent/runner.rs` to read the context limit, calculate the percent of the window consumed based on the latest assistant `Usage`, and define a compaction threshold (e.g., 80%). - Before building an `LLMRequest` in `run_single_iteration`, detect when the threshold is exceeded. -- When triggered, inject a system-authored prompt requesting a detailed summary, send it to the LLM, and store the response as an assistant message containing `ContentBlock::CompactionSummary`. +- When triggered, inject a system-authored prompt requesting a detailed summary, send it to the LLM without streaming, and store the response as a user message tagged with `is_compaction_summary`. - Adjust the message-preparation path (`render_tool_results_in_messages` and any related helpers) so the next LLM request only includes messages from the last compaction summary onward, while keeping the full `message_history` for persistence and UI. - **Tests:** add unit coverage to assert the compaction branch fires when expected, the summary block is stored correctly, and filtering logic feeds only post-summary messages to the provider. @@ -23,7 +23,7 @@ This document outlines the phased approach for adding automatic context compacti ## Phase 4 – UI Presentation - Extend `DisplayFragment` with `CompactionDivider` in `crates/code_assistant/src/ui/streaming/mod.rs`. -- Update stream processors (`json_processor.rs`, `xml_processor.rs`, `caret_processor.rs`) to emit the divider fragment for `ContentBlock::CompactionSummary`. +- Update stream processors (`json_processor.rs`, `xml_processor.rs`, `caret_processor.rs`) to emit the divider fragment when converting messages tagged with `is_compaction_summary`. - Enhance GPUI components: - Add a collapsible divider block in `crates/code_assistant/src/ui/gpui/elements.rs` showing the “conversation compacted” banner and the summary text. - Ensure `MessagesView` (`crates/code_assistant/src/ui/gpui/messages.rs`) handles the fragment, including expand/collapse state management. @@ -38,7 +38,7 @@ This document outlines the phased approach for adding automatic context compacti ─ Worked for 1h 18m 34s ────────────────────────────────────────────────────────────────────────────────────────────────── -• - Added automatic compaction support: when prior assistant usage crosses the configured context window threshold, we now inject a summary request, persist the result as ContentBlock::CompactionSummary, and refresh the UI so a collapsible “conversation compacted” banner appears (crates/code_assistant/src/agent/runner.rs#L1239, crates/code_assistant/src/ui/gpui/elements.rs#L1488, crates/code_assistant/src/ui/gpui/mod.rs#L1016). +• - Added automatic compaction support: when prior assistant usage crosses the configured context window threshold, we now inject a summary request, persist the result as a user message tagged with `is_compaction_summary`, and refresh the UI so a collapsible “conversation compacted” banner appears (see `crates/code_assistant/src/agent/runner.rs`, `crates/code_assistant/src/ui/gpui/elements.rs`, and `crates/code_assistant/src/ui/gpui/mod.rs`). - Introduced context_token_limit to the model pipeline, loading it from the shared provider configuration whenever the agent needs it (crates/llm/src/provider_config.rs#L26, crates/code_assistant/src/agent/runner.rs#L1239) instead of persisting it per session. - Updated streaming/terminal/ACP surfaces to understand the new compaction summary block so replay, terminal, and ACP conversions all remain consistent (see crates/llm/src/anthropic.rs#L170, crates/code_assistant/src/ui/streaming/json_processor.rs#L311, crates/code_assistant/src/ui/terminal/ui.rs#L420, crates/code_assistant/src/acp/ui.rs#L130). - Documented the rollout as a phased plan in docs/context-compaction.md and added focused unit coverage (crates/code_assistant/src/agent/tests.rs#L780) validating summary insertion and UI refresh. From cf8ceda4986e5986ddd59692875afa5459c29973 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20A=C3=9Fmus?= Date: Sat, 8 Nov 2025 18:30:21 +0100 Subject: [PATCH 09/14] Use ..Default::default() in message constructors --- crates/llm/src/types.rs | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/crates/llm/src/types.rs b/crates/llm/src/types.rs index 1a2ea485..902aba97 100644 --- a/crates/llm/src/types.rs +++ b/crates/llm/src/types.rs @@ -307,8 +307,7 @@ impl Message { Self { role: MessageRole::User, content: MessageContent::Text(text.into()), - request_id: None, - usage: None, + ..Default::default() } } @@ -316,8 +315,7 @@ impl Message { Self { role: MessageRole::Assistant, content: MessageContent::Text(text.into()), - request_id: None, - usage: None, + ..Default::default() } } @@ -325,8 +323,7 @@ impl Message { Self { role: MessageRole::User, content: MessageContent::Structured(content), - request_id: None, - usage: None, + ..Default::default() } } @@ -334,8 +331,7 @@ impl Message { Self { role: MessageRole::Assistant, content: MessageContent::Structured(content), - request_id: None, - usage: None, + ..Default::default() } } From 9f0a990fd5e99f5ed594ca18f9575361e4652409 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20A=C3=9Fmus?= Date: Sat, 8 Nov 2025 18:37:47 +0100 Subject: [PATCH 10/14] Use new constructors --- crates/code_assistant/src/agent/tests.rs | 32 +++++++++--------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/crates/code_assistant/src/agent/tests.rs b/crates/code_assistant/src/agent/tests.rs index 5ddc093f..4c7bc603 100644 --- a/crates/code_assistant/src/agent/tests.rs +++ b/crates/code_assistant/src/agent/tests.rs @@ -779,26 +779,18 @@ async fn test_context_compaction_inserts_summary() -> Result<()> { ); agent.set_test_context_limit(100); - agent.append_message(Message { - role: MessageRole::User, - content: MessageContent::Text("User request".to_string()), - ..Default::default() - })?; - - agent.append_message(Message { - role: MessageRole::Assistant, - content: MessageContent::Structured(vec![ContentBlock::new_text( - "Assistant reply".to_string(), - )]), - request_id: Some(1), - usage: Some(Usage { - input_tokens: 85, - output_tokens: 12, - cache_creation_input_tokens: 0, - cache_read_input_tokens: 0, - }), - ..Default::default() - })?; + agent.append_message(Message::new_user("User request"))?; + + agent.append_message( + Message::new_assistant_content(vec![ContentBlock::new_text("Assistant reply")]) + .with_request_id(1) + .with_usage(Usage { + input_tokens: 85, + output_tokens: 12, + cache_creation_input_tokens: 0, + cache_read_input_tokens: 0, + }), + )?; agent.run_single_iteration().await?; From bd9ac73f2bd0e753dfccc9d3f48202eab76c8d19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20A=C3=9Fmus?= Date: Sat, 8 Nov 2025 18:53:52 +0100 Subject: [PATCH 11/14] Add test for assertig compaction ignores previous compactions --- crates/code_assistant/src/agent/tests.rs | 138 +++++++++++++++++++++++ 1 file changed, 138 insertions(+) diff --git a/crates/code_assistant/src/agent/tests.rs b/crates/code_assistant/src/agent/tests.rs index 4c7bc603..2fcabd07 100644 --- a/crates/code_assistant/src/agent/tests.rs +++ b/crates/code_assistant/src/agent/tests.rs @@ -848,6 +848,144 @@ async fn test_context_compaction_inserts_summary() -> Result<()> { Ok(()) } +#[tokio::test] +async fn test_context_compaction_uses_only_messages_after_previous_summary() -> Result<()> { + let new_summary_text = "Second compaction summary"; + let previous_summary_text = "Earlier summary text"; + let old_user_text = "Old user request before compaction"; + let old_assistant_text = "Old assistant response before compaction"; + let post_summary_user_text = "User request after previous compaction"; + let post_summary_assistant_text = + "Assistant response after compaction that pushed us over the limit"; + + let summary_response = LLMResponse { + content: vec![ContentBlock::new_text(new_summary_text)], + usage: Usage { + input_tokens: 15, + output_tokens: 6, + cache_creation_input_tokens: 0, + cache_read_input_tokens: 0, + }, + rate_limit_info: None, + }; + + let idle_response = LLMResponse { + content: Vec::new(), + usage: Usage::zero(), + rate_limit_info: None, + }; + + let mock_llm = MockLLMProvider::new(vec![Ok(idle_response), Ok(summary_response)]); + let mock_llm_ref = mock_llm.clone(); + + let ui = Arc::new(MockUI::default()); + + let components = AgentComponents { + llm_provider: Box::new(mock_llm), + project_manager: Box::new(MockProjectManager::new()), + command_executor: Box::new(create_command_executor_mock()), + ui: ui.clone(), + state_persistence: Box::new(MockStatePersistence::new()), + }; + + let session_config = SessionConfig { + init_path: Some(PathBuf::from("./test_path")), + initial_project: String::new(), + tool_syntax: ToolSyntax::Native, + use_diff_blocks: false, + }; + + let mut agent = Agent::new(components, session_config); + agent.disable_naming_reminders(); + agent.set_test_session_metadata( + "session-1".to_string(), + SessionModelConfig::new_for_tests("test-model".to_string()), + ); + agent.set_test_context_limit(100); + + // Seed history before the first compaction + agent.append_message(Message::new_user(old_user_text))?; + agent.append_message( + Message::new_assistant(old_assistant_text) + .with_request_id(1) + .with_usage(Usage { + input_tokens: 20, + output_tokens: 10, + cache_creation_input_tokens: 0, + cache_read_input_tokens: 0, + }), + )?; + + // Simulate an earlier compaction summary + agent.append_message(Message { + role: MessageRole::User, + content: MessageContent::Text(previous_summary_text.to_string()), + is_compaction_summary: true, + ..Default::default() + })?; + + // Add conversation after the previous compaction that should stay active + agent.append_message(Message::new_user(post_summary_user_text))?; + agent.append_message( + Message::new_assistant_content(vec![ContentBlock::new_text(post_summary_assistant_text)]) + .with_request_id(2) + .with_usage(Usage { + input_tokens: 85, + output_tokens: 12, + cache_creation_input_tokens: 0, + cache_read_input_tokens: 0, + }), + )?; + + agent.run_single_iteration().await?; + + let requests = mock_llm_ref.get_requests(); + assert_eq!(requests.len(), 2); + let compaction_request = &requests[0]; + + let message_contains = |message: &Message, needle: &str| -> bool { + match &message.content { + MessageContent::Text(text) => text.contains(needle), + MessageContent::Structured(blocks) => blocks.iter().any(|block| match block { + ContentBlock::Text { text, .. } => text.contains(needle), + ContentBlock::Thinking { thinking, .. } => thinking.contains(needle), + ContentBlock::ToolResult { content, .. } => content.contains(needle), + _ => false, + }), + } + }; + + let request_contains = |needle: &str| -> bool { + compaction_request + .messages + .iter() + .any(|message| message_contains(message, needle)) + }; + + assert!( + !request_contains(old_user_text), + "Compaction request should skip messages before the previous summary", + ); + assert!( + !request_contains(old_assistant_text), + "Compaction request should skip assistant replies before the previous summary", + ); + assert!( + request_contains(previous_summary_text), + "Compaction request should include the most recent summary to preserve context", + ); + assert!( + request_contains(post_summary_user_text), + "Compaction request should include user messages after the previous summary", + ); + assert!( + request_contains(post_summary_assistant_text), + "Compaction request should include assistant replies after the previous summary", + ); + + Ok(()) +} + #[test] fn test_ui_filtering_with_failed_tool_messages() -> Result<()> { use crate::persistence::ChatSession; From 025cc35f8faac43833d32e2c0c75dff5d20551cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20A=C3=9Fmus?= Date: Sat, 8 Nov 2025 19:05:01 +0100 Subject: [PATCH 12/14] Don't include compaction prompt in history --- .../resources/compaction_prompt.md | 8 ++ crates/code_assistant/src/agent/runner.rs | 13 +-- crates/code_assistant/src/agent/tests.rs | 94 +++++++++++++++++++ 3 files changed, 105 insertions(+), 10 deletions(-) create mode 100644 crates/code_assistant/resources/compaction_prompt.md diff --git a/crates/code_assistant/resources/compaction_prompt.md b/crates/code_assistant/resources/compaction_prompt.md new file mode 100644 index 00000000..f2213526 --- /dev/null +++ b/crates/code_assistant/resources/compaction_prompt.md @@ -0,0 +1,8 @@ + +The conversation history is nearing the model's context window limit. Provide a thorough summary that allows resuming the task without the earlier messages. Include: +- The current objectives or tasks. +- Key actions taken so far and their outcomes. +- Important files, commands, or decisions that matter for continuing. +- Outstanding questions or follow-up work that still needs attention. +Respond with plain text only. + diff --git a/crates/code_assistant/src/agent/runner.rs b/crates/code_assistant/src/agent/runner.rs index 9ea3bf2b..e470a380 100644 --- a/crates/code_assistant/src/agent/runner.rs +++ b/crates/code_assistant/src/agent/runner.rs @@ -76,14 +76,7 @@ pub struct Agent { } const CONTEXT_USAGE_THRESHOLD: f32 = 0.8; -const CONTEXT_COMPACTION_PROMPT: &str = r#" -The conversation history is nearing the model's context window limit. Provide a thorough summary that allows resuming the task without the earlier messages. Include: -- The current objectives or tasks. -- Key actions taken so far and their outcomes. -- Important files, commands, or decisions that matter for continuing. -- Outstanding questions or follow-up work that still needs attention. -Respond with plain text only. -"#; +const CONTEXT_COMPACTION_PROMPT: &str = include_str!("../../resources/compaction_prompt.md"); impl Agent { /// Formats an error, particularly ToolErrors, into a user-friendly string. @@ -1191,9 +1184,9 @@ impl Agent { content: MessageContent::Text(CONTEXT_COMPACTION_PROMPT.to_string()), ..Default::default() }; - self.append_message(compaction_message)?; - let messages = self.render_tool_results_in_messages(); + let mut messages = self.render_tool_results_in_messages(); + messages.push(compaction_message); let (response, _) = self.get_non_streaming_response(messages).await?; let summary_text = Self::extract_compaction_summary_text(&response.content); diff --git a/crates/code_assistant/src/agent/tests.rs b/crates/code_assistant/src/agent/tests.rs index 2fcabd07..ac81cab2 100644 --- a/crates/code_assistant/src/agent/tests.rs +++ b/crates/code_assistant/src/agent/tests.rs @@ -848,6 +848,100 @@ async fn test_context_compaction_inserts_summary() -> Result<()> { Ok(()) } +#[tokio::test] +async fn test_compaction_prompt_not_persisted_in_history() -> Result<()> { + let summary_text = "Summary to store after compaction"; + let summary_response = LLMResponse { + content: vec![ContentBlock::new_text(summary_text)], + usage: Usage { + input_tokens: 20, + output_tokens: 8, + cache_creation_input_tokens: 0, + cache_read_input_tokens: 0, + }, + rate_limit_info: None, + }; + + let idle_response = LLMResponse { + content: Vec::new(), + usage: Usage::zero(), + rate_limit_info: None, + }; + + let mock_llm = MockLLMProvider::new(vec![Ok(idle_response), Ok(summary_response)]); + + let components = AgentComponents { + llm_provider: Box::new(mock_llm), + project_manager: Box::new(MockProjectManager::new()), + command_executor: Box::new(create_command_executor_mock()), + ui: Arc::new(MockUI::default()), + state_persistence: Box::new(MockStatePersistence::new()), + }; + + let session_config = SessionConfig { + init_path: Some(PathBuf::from("./test_path")), + initial_project: String::new(), + tool_syntax: ToolSyntax::Native, + use_diff_blocks: false, + }; + + let mut agent = Agent::new(components, session_config); + agent.disable_naming_reminders(); + agent.set_test_session_metadata( + "session-1".to_string(), + SessionModelConfig::new_for_tests("test-model".to_string()), + ); + agent.set_test_context_limit(100); + + agent.append_message(Message::new_user("User request"))?; + agent.append_message( + Message::new_assistant_content(vec![ContentBlock::new_text( + "Assistant reply pushing over limit", + )]) + .with_request_id(1) + .with_usage(Usage { + input_tokens: 85, + output_tokens: 12, + cache_creation_input_tokens: 0, + cache_read_input_tokens: 0, + }), + )?; + + agent.run_single_iteration().await?; + + let compaction_prompt = include_str!("../../resources/compaction_prompt.md"); + let history_contains_prompt = + agent + .message_history_for_tests() + .iter() + .any(|message| match &message.content { + MessageContent::Text(text) => text.contains(compaction_prompt), + MessageContent::Structured(blocks) => blocks.iter().any(|block| match block { + ContentBlock::Text { text, .. } => text.contains(compaction_prompt), + ContentBlock::Thinking { thinking, .. } => thinking.contains(compaction_prompt), + ContentBlock::ToolResult { content, .. } => content.contains(compaction_prompt), + _ => false, + }), + }); + + assert!( + !history_contains_prompt, + "Compaction prompt should not be persisted in the session history", + ); + + // Still ensure the summary made it into history for future iterations + let has_summary = agent.message_history_for_tests().iter().any(|message| { + message.is_compaction_summary + && matches!(&message.content, MessageContent::Text(text) if text == summary_text) + }); + assert!( + has_summary, + "Compaction summary should be stored in history" + ); + + Ok(()) +} + #[tokio::test] async fn test_context_compaction_uses_only_messages_after_previous_summary() -> Result<()> { let new_summary_text = "Second compaction summary"; From 6d503d706b01671c5a4b57b9375466f7ba9658df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20A=C3=9Fmus?= Date: Sat, 8 Nov 2025 21:08:25 +0100 Subject: [PATCH 13/14] Make sure we display compaction divider fragment also during streaming --- crates/code_assistant/src/acp/ui.rs | 1 + crates/code_assistant/src/agent/runner.rs | 20 +++++++++++++- crates/code_assistant/src/agent/tests.rs | 25 +++++++++++++++++- crates/code_assistant/src/session/instance.rs | 9 +++++++ crates/code_assistant/src/ui/gpui/mod.rs | 26 +++++++++++++++---- crates/code_assistant/src/ui/terminal/ui.rs | 14 ++++++++-- crates/code_assistant/src/ui/ui_events.rs | 4 +++ 7 files changed, 90 insertions(+), 9 deletions(-) diff --git a/crates/code_assistant/src/acp/ui.rs b/crates/code_assistant/src/acp/ui.rs index 0bbaab12..202acbaa 100644 --- a/crates/code_assistant/src/acp/ui.rs +++ b/crates/code_assistant/src/acp/ui.rs @@ -622,6 +622,7 @@ impl UserInterface for ACPUserUI { // Events that don't translate to ACP UiEvent::UpdateMemory { .. } | UiEvent::SetMessages { .. } + | UiEvent::DisplayCompactionSummary { .. } | UiEvent::StreamingStarted(_) | UiEvent::StreamingStopped { .. } | UiEvent::RefreshChatList diff --git a/crates/code_assistant/src/agent/runner.rs b/crates/code_assistant/src/agent/runner.rs index e470a380..f2f8222b 100644 --- a/crates/code_assistant/src/agent/runner.rs +++ b/crates/code_assistant/src/agent/runner.rs @@ -2,6 +2,7 @@ use crate::agent::persistence::AgentStatePersistence; use crate::agent::types::ToolExecution; use crate::config::ProjectManager; use crate::persistence::{ChatMetadata, SessionModelConfig}; +use crate::session::instance::SessionActivityState; use crate::session::SessionConfig; use crate::tools::core::{ResourcesTracker, ToolContext, ToolRegistry, ToolScope}; use crate::tools::{generate_system_message, ParserRegistry, ToolRequest}; @@ -199,6 +200,18 @@ impl Agent { } } + async fn update_activity_state(&self, new_state: SessionActivityState) -> Result<()> { + if let Some(session_id) = &self.session_id { + self.ui + .send_event(UiEvent::UpdateSessionActivityState { + session_id: session_id.clone(), + activity_state: new_state, + }) + .await?; + } + Ok(()) + } + /// Build current session metadata fn build_current_metadata(&self) -> Option { // Only build metadata if we have a session ID @@ -1187,7 +1200,12 @@ impl Agent { let mut messages = self.render_tool_results_in_messages(); messages.push(compaction_message); - let (response, _) = self.get_non_streaming_response(messages).await?; + self.update_activity_state(SessionActivityState::WaitingForResponse) + .await?; + let response_result = self.get_non_streaming_response(messages).await; + self.update_activity_state(SessionActivityState::AgentRunning) + .await?; + let (response, _) = response_result?; let summary_text = Self::extract_compaction_summary_text(&response.content); let summary_message = Message { diff --git a/crates/code_assistant/src/agent/tests.rs b/crates/code_assistant/src/agent/tests.rs index ac81cab2..804731cd 100644 --- a/crates/code_assistant/src/agent/tests.rs +++ b/crates/code_assistant/src/agent/tests.rs @@ -1,6 +1,7 @@ use super::*; use crate::agent::persistence::MockStatePersistence; use crate::persistence::SessionModelConfig; +use crate::session::instance::SessionActivityState; use crate::session::SessionConfig; use crate::tests::mocks::MockLLMProvider; use crate::tests::mocks::{ @@ -9,6 +10,7 @@ use crate::tests::mocks::{ }; use crate::tests::utils::parse_and_truncate_llm_response; use crate::types::*; +use crate::ui::ui_events::UiEvent; use anyhow::Result; use llm::types::*; use std::path::PathBuf; @@ -869,12 +871,13 @@ async fn test_compaction_prompt_not_persisted_in_history() -> Result<()> { }; let mock_llm = MockLLMProvider::new(vec![Ok(idle_response), Ok(summary_response)]); + let ui = Arc::new(MockUI::default()); let components = AgentComponents { llm_provider: Box::new(mock_llm), project_manager: Box::new(MockProjectManager::new()), command_executor: Box::new(create_command_executor_mock()), - ui: Arc::new(MockUI::default()), + ui: ui.clone(), state_persistence: Box::new(MockStatePersistence::new()), }; @@ -939,6 +942,26 @@ async fn test_compaction_prompt_not_persisted_in_history() -> Result<()> { "Compaction summary should be stored in history" ); + let events = ui.events(); + let observed_states: Vec<_> = events + .iter() + .filter_map(|event| { + if let UiEvent::UpdateSessionActivityState { activity_state, .. } = event { + Some(activity_state.clone()) + } else { + None + } + }) + .collect(); + assert!( + observed_states.contains(&SessionActivityState::WaitingForResponse), + "Compaction should set activity state to WaitingForResponse" + ); + assert!( + observed_states.contains(&SessionActivityState::AgentRunning), + "Compaction should restore activity state to AgentRunning" + ); + Ok(()) } diff --git a/crates/code_assistant/src/session/instance.rs b/crates/code_assistant/src/session/instance.rs index e60e1c85..976d4858 100644 --- a/crates/code_assistant/src/session/instance.rs +++ b/crates/code_assistant/src/session/instance.rs @@ -517,6 +517,15 @@ impl UserInterface for ProxyUI { // The agent task will set the state to Idle when it terminates } } + UiEvent::UpdateSessionActivityState { + session_id, + activity_state, + } => { + if session_id == &self.session_id { + self.update_activity_state(activity_state.clone()); + return Ok(()); + } + } _ => {} } diff --git a/crates/code_assistant/src/ui/gpui/mod.rs b/crates/code_assistant/src/ui/gpui/mod.rs index 91e4a9d7..dfab9916 100644 --- a/crates/code_assistant/src/ui/gpui/mod.rs +++ b/crates/code_assistant/src/ui/gpui/mod.rs @@ -511,6 +511,20 @@ impl Gpui { cx.notify(); }); } + UiEvent::DisplayCompactionSummary { summary } => { + let mut queue = self.message_queue.lock().unwrap(); + let result = cx.new(|cx| { + let message = MessageContainer::with_role(MessageRole::User, cx); + message.add_compaction_divider(summary.clone(), cx); + message + }); + if let Ok(new_message) = result { + queue.push(new_message); + } else { + warn!("Failed to create compaction summary message"); + } + cx.refresh().expect("Failed to refresh windows"); + } UiEvent::AppendToTextBlock { content } => { // Since StreamingStarted ensures last container is Assistant, we can safely append self.update_last_message(cx, |message, cx| { @@ -1390,6 +1404,9 @@ impl UserInterface for Gpui { delta: delta.clone(), }); } + DisplayFragment::ReasoningComplete => { + self.push_event(UiEvent::CompleteReasoning); + } DisplayFragment::ToolOutput { tool_id, chunk } => { if tool_id.is_empty() { warn!( @@ -1414,11 +1431,10 @@ impl UserInterface for Gpui { "GPUI: Tool {tool_id} attached terminal {terminal_id}; no dedicated UI hook" ); } - DisplayFragment::CompactionDivider { .. } => { - // Compaction summaries are handled via refresh events rather than streaming. - } - DisplayFragment::ReasoningComplete => { - self.push_event(UiEvent::CompleteReasoning); + DisplayFragment::CompactionDivider { summary } => { + self.push_event(UiEvent::DisplayCompactionSummary { + summary: summary.clone(), + }); } } diff --git a/crates/code_assistant/src/ui/terminal/ui.rs b/crates/code_assistant/src/ui/terminal/ui.rs index 3680183e..4e1cfece 100644 --- a/crates/code_assistant/src/ui/terminal/ui.rs +++ b/crates/code_assistant/src/ui/terminal/ui.rs @@ -208,6 +208,14 @@ impl UserInterface for TerminalTuiUI { } } } + UiEvent::DisplayCompactionSummary { summary } => { + debug!("Displaying compaction summary"); + if let Some(renderer) = self.renderer.lock().await.as_ref() { + let mut renderer_guard = renderer.lock().await; + let formatted = format!("\n\n[conversation compacted]\n{summary}\n",); + let _ = renderer_guard.add_instruction_message(&formatted); + } + } UiEvent::StreamingStarted(request_id) => { debug!("Streaming started for request {}", request_id); self.cancel_flag.store(false, Ordering::SeqCst); @@ -422,8 +430,10 @@ impl UserInterface for TerminalTuiUI { "Tool {tool_id} attached client terminal {terminal_id}; terminal UI has no live view" ); } - DisplayFragment::CompactionDivider { .. } => { - // Compaction summaries are surfaced via refresh events in terminal UI. + DisplayFragment::CompactionDivider { summary } => { + self.push_event(UiEvent::DisplayCompactionSummary { + summary: summary.clone(), + }); } DisplayFragment::ReasoningComplete => { // For terminal UI, no specific action needed for reasoning completion diff --git a/crates/code_assistant/src/ui/ui_events.rs b/crates/code_assistant/src/ui/ui_events.rs index 07b9ef1d..23fe02b4 100644 --- a/crates/code_assistant/src/ui/ui_events.rs +++ b/crates/code_assistant/src/ui/ui_events.rs @@ -28,6 +28,10 @@ pub enum UiEvent { content: String, attachments: Vec, }, + /// Display a system-generated compaction divider message + DisplayCompactionSummary { + summary: String, + }, /// Append to the last text block AppendToTextBlock { content: String }, /// Append to the last thinking block From 4534a60435ec493055d5c142a471ca54a2c3a513 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20A=C3=9Fmus?= Date: Sat, 8 Nov 2025 21:14:54 +0100 Subject: [PATCH 14/14] cargo fmt --- crates/code_assistant/src/ui/ui_events.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/crates/code_assistant/src/ui/ui_events.rs b/crates/code_assistant/src/ui/ui_events.rs index 23fe02b4..3f348ad1 100644 --- a/crates/code_assistant/src/ui/ui_events.rs +++ b/crates/code_assistant/src/ui/ui_events.rs @@ -29,9 +29,7 @@ pub enum UiEvent { attachments: Vec, }, /// Display a system-generated compaction divider message - DisplayCompactionSummary { - summary: String, - }, + DisplayCompactionSummary { summary: String }, /// Append to the last text block AppendToTextBlock { content: String }, /// Append to the last thinking block