diff --git a/.gitignore b/.gitignore deleted file mode 100644 index eafc87270..000000000 --- a/.gitignore +++ /dev/null @@ -1,197 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -# Note: We keep our custom spec file in version control -# *.spec - -# PyInstaller build directories -build/ -dist/ - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -cover/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -.pybuilder/ -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -# For a library or package, you might want to ignore these files since the code is -# intended to run in multiple environments; otherwise, check them in: -# .python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# poetry -# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. -# This is especially recommended for binary packages to ensure reproducibility, and is more -# commonly ignored for libraries. -# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control -#poetry.lock - -# pdm -# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. -#pdm.lock -# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it -# in version control. -# https://pdm.fming.dev/#use-with-ide -.pdm.toml - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# pytype static type analyzer -.pytype/ - -# Cython debug symbols -cython_debug/ - -# PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be added to the global gitignore or merged into this project gitignore. For a PyCharm -# project, it is recommended to ignore the entire .idea directory. -.idea/ - -# VS Code -.vscode/ - -# macOS -.DS_Store -.AppleDouble -.LSOverride - -# Windows -Thumbs.db -ehthumbs.db -Desktop.ini -$RECYCLE.BIN/ - -# Linux -*~ - -# Temporary files -*.tmp -*.temp -*.swp -*.swo - -# UV specific -.uv/ - -# Project specific -*.log -.coverage -.pytest_cache/ -software-agent-sdk/ -snapshot_report.html diff --git a/.serena/project.yml b/.serena/project.yml new file mode 100644 index 000000000..eacf14679 --- /dev/null +++ b/.serena/project.yml @@ -0,0 +1,126 @@ +# the name by which the project can be referenced within Serena +project_name: "IronHands-CLI" + + +# list of languages for which language servers are started; choose from: +# al bash clojure cpp csharp +# csharp_omnisharp dart elixir elm erlang +# fortran fsharp go groovy haskell +# java julia kotlin lua markdown +# matlab nix pascal perl php +# php_phpactor powershell python python_jedi r +# rego ruby ruby_solargraph rust scala +# swift terraform toml typescript typescript_vts +# vue yaml zig +# (This list may be outdated. For the current list, see values of Language enum here: +# https://github.com/oraios/serena/blob/main/src/solidlsp/ls_config.py +# For some languages, there are alternative language servers, e.g. csharp_omnisharp, ruby_solargraph.) +# Note: +# - For C, use cpp +# - For JavaScript, use typescript +# - For Free Pascal/Lazarus, use pascal +# Special requirements: +# Some languages require additional setup/installations. +# See here for details: https://oraios.github.io/serena/01-about/020_programming-languages.html#language-servers +# When using multiple languages, the first language server that supports a given file will be used for that file. +# The first language is the default language and the respective language server will be used as a fallback. +# Note that when using the JetBrains backend, language servers are not used and this list is correspondingly ignored. +languages: +- python + +# the encoding used by text files in the project +# For a list of possible encodings, see https://docs.python.org/3.11/library/codecs.html#standard-encodings +encoding: "utf-8" + +# The language backend to use for this project. +# If not set, the global setting from serena_config.yml is used. +# Valid values: LSP, JetBrains +# Note: the backend is fixed at startup. If a project with a different backend +# is activated post-init, an error will be returned. +language_backend: + +# whether to use project's .gitignore files to ignore files +ignore_all_files_in_gitignore: true + +# list of additional paths to ignore in this project. +# Same syntax as gitignore, so you can use * and **. +# Note: global ignored_paths from serena_config.yml are also applied additively. +ignored_paths: [] + +# whether the project is in read-only mode +# If set to true, all editing tools will be disabled and attempts to use them will result in an error +# Added on 2025-04-18 +read_only: false + +# list of tool names to exclude. We recommend not excluding any tools, see the readme for more details. +# Below is the complete list of tools for convenience. +# To make sure you have the latest list of tools, and to view their descriptions, +# execute `uv run scripts/print_tool_overview.py`. +# +# * `activate_project`: Activates a project by name. +# * `check_onboarding_performed`: Checks whether project onboarding was already performed. +# * `create_text_file`: Creates/overwrites a file in the project directory. +# * `delete_lines`: Deletes a range of lines within a file. +# * `delete_memory`: Deletes a memory from Serena's project-specific memory store. +# * `execute_shell_command`: Executes a shell command. +# * `find_referencing_code_snippets`: Finds code snippets in which the symbol at the given location is referenced. +# * `find_referencing_symbols`: Finds symbols that reference the symbol at the given location (optionally filtered by type). +# * `find_symbol`: Performs a global (or local) search for symbols with/containing a given name/substring (optionally filtered by type). +# * `get_current_config`: Prints the current configuration of the agent, including the active and available projects, tools, contexts, and modes. +# * `get_symbols_overview`: Gets an overview of the top-level symbols defined in a given file. +# * `initial_instructions`: Gets the initial instructions for the current project. +# Should only be used in settings where the system prompt cannot be set, +# e.g. in clients you have no control over, like Claude Desktop. +# * `insert_after_symbol`: Inserts content after the end of the definition of a given symbol. +# * `insert_at_line`: Inserts content at a given line in a file. +# * `insert_before_symbol`: Inserts content before the beginning of the definition of a given symbol. +# * `list_dir`: Lists files and directories in the given directory (optionally with recursion). +# * `list_memories`: Lists memories in Serena's project-specific memory store. +# * `onboarding`: Performs onboarding (identifying the project structure and essential tasks, e.g. for testing or building). +# * `prepare_for_new_conversation`: Provides instructions for preparing for a new conversation (in order to continue with the necessary context). +# * `read_file`: Reads a file within the project directory. +# * `read_memory`: Reads the memory with the given name from Serena's project-specific memory store. +# * `remove_project`: Removes a project from the Serena configuration. +# * `replace_lines`: Replaces a range of lines within a file with new content. +# * `replace_symbol_body`: Replaces the full definition of a symbol. +# * `restart_language_server`: Restarts the language server, may be necessary when edits not through Serena happen. +# * `search_for_pattern`: Performs a search for a pattern in the project. +# * `summarize_changes`: Provides instructions for summarizing the changes made to the codebase. +# * `switch_modes`: Activates modes by providing a list of their names +# * `think_about_collected_information`: Thinking tool for pondering the completeness of collected information. +# * `think_about_task_adherence`: Thinking tool for determining whether the agent is still on track with the current task. +# * `think_about_whether_you_are_done`: Thinking tool for determining whether the task is truly completed. +# * `write_memory`: Writes a named memory (for future reference) to Serena's project-specific memory store. +excluded_tools: [] + +# list of tools to include that would otherwise be disabled (particularly optional tools that are disabled by default) +included_optional_tools: [] + +# fixed set of tools to use as the base tool set (if non-empty), replacing Serena's default set of tools. +# This cannot be combined with non-empty excluded_tools or included_optional_tools. +fixed_tools: [] + +# list of mode names to that are always to be included in the set of active modes +# The full set of modes to be activated is base_modes + default_modes. +# If the setting is undefined, the base_modes from the global configuration (serena_config.yml) apply. +# Otherwise, this setting overrides the global configuration. +# Set this to [] to disable base modes for this project. +# Set this to a list of mode names to always include the respective modes for this project. +base_modes: + +# list of mode names that are to be activated by default. +# The full set of modes to be activated is base_modes + default_modes. +# If the setting is undefined, the default_modes from the global configuration (serena_config.yml) apply. +# Otherwise, this overrides the setting from the global configuration (serena_config.yml). +# This setting can, in turn, be overridden by CLI parameters (--mode). +default_modes: + +# initial prompt for the project. It will always be given to the LLM upon activating the project +# (contrary to the memories, which are loaded on demand). +initial_prompt: "" + +# time budget (seconds) per tool call for the retrieval of additional symbol information +# such as docstrings or parameter information. +# This overrides the corresponding setting in the global configuration; see the documentation there. +# If null or missing, use the setting from the global configuration. +symbol_info_budget: diff --git a/openhands_cli/entrypoint.py b/openhands_cli/entrypoint.py index 479d311ec..e1be3cbae 100644 --- a/openhands_cli/entrypoint.py +++ b/openhands_cli/entrypoint.py @@ -11,17 +11,23 @@ import warnings from pathlib import Path -from dotenv import load_dotenv -from rich.console import Console +# BUG-005: Suppress RequestsDependencyWarning before third-party imports. +# The `requests` library (via urllib3) emits this warning at import time when +# charset_normalizer is used instead of chardet. Filter must be active before +# any transitive import of `requests` (e.g., via openhands_cli.stores). +warnings.filterwarnings("ignore", message=r"urllib3.*or chardet.*charset_normalizer") -from openhands_cli.argparsers.main_parser import create_main_parser -from openhands_cli.stores import ( +from dotenv import load_dotenv # noqa: E402 +from rich.console import Console # noqa: E402 + +from openhands_cli.argparsers.main_parser import create_main_parser # noqa: E402 +from openhands_cli.stores import ( # noqa: E402 MissingEnvironmentVariablesError, check_and_warn_env_vars, ) -from openhands_cli.terminal_compat import check_terminal_compatibility -from openhands_cli.theme import OPENHANDS_THEME -from openhands_cli.utils import create_seeded_instructions_from_args +from openhands_cli.terminal_compat import check_terminal_compatibility # noqa: E402 +from openhands_cli.theme import OPENHANDS_THEME # noqa: E402 +from openhands_cli.utils import create_seeded_instructions_from_args # noqa: E402 console = Console() @@ -47,6 +53,12 @@ def handle_resume_logic(args: argparse.Namespace) -> str | None: Returns: Conversation ID to resume, or None if it should show conversation list or exit """ + # BUG-003: Normalize `--resume last` (case-insensitive) to the --last code path. + # Users intuitively type `--resume last` instead of `--resume --last`. + if args.resume and args.resume.lower() == "last": + args.last = True + args.resume = "" + # Check if --last flag is used if args.last: if args.resume is None: diff --git a/openhands_cli/stores/cli_settings.py b/openhands_cli/stores/cli_settings.py index 709ba3e5a..f60249ceb 100644 --- a/openhands_cli/stores/cli_settings.py +++ b/openhands_cli/stores/cli_settings.py @@ -1,9 +1,12 @@ """CLI settings models and utilities.""" import json +import logging import os from pathlib import Path +logger = logging.getLogger(__name__) + from pydantic import BaseModel, field_validator @@ -51,8 +54,17 @@ class CliSettings(BaseModel): default_cells_expanded: bool = False auto_open_plan_panel: bool = True + replay_window_size: int = 200 critic: CriticSettings = CriticSettings() + @field_validator("replay_window_size") + @classmethod + def validate_replay_window_size(cls, v: int) -> int: + """Validate replay window size is between 10 and 1000.""" + if not 10 <= v <= 1000: + raise ValueError(f"Replay window size must be between 10 and 1000, got {v}") + return v + @classmethod def get_config_path(cls) -> Path: """Get the path to the CLI configuration file.""" @@ -128,8 +140,13 @@ def load(cls) -> "CliSettings": settings.save() return settings - except (json.JSONDecodeError, ValueError): + except (json.JSONDecodeError, ValueError) as exc: # If file is corrupted, return defaults + logger.warning( + "Failed to load CLI settings from %s: %s; using defaults", + config_path, + exc, + ) return cls() def save(self) -> None: diff --git a/openhands_cli/tui/core/__init__.py b/openhands_cli/tui/core/__init__.py index 9e7e23f45..6290bfaa1 100644 --- a/openhands_cli/tui/core/__init__.py +++ b/openhands_cli/tui/core/__init__.py @@ -11,6 +11,7 @@ ) from openhands_cli.tui.core.events import ( ConfirmationDecision, + LoadOlderEvents, RequestSwitchConfirmation, ShowConfirmationPanel, ) @@ -41,4 +42,5 @@ "RequestSwitchConfirmation", "ShowConfirmationPanel", "ConfirmationDecision", + "LoadOlderEvents", ] diff --git a/openhands_cli/tui/core/conversation_manager.py b/openhands_cli/tui/core/conversation_manager.py index 421553562..af6f96596 100644 --- a/openhands_cli/tui/core/conversation_manager.py +++ b/openhands_cli/tui/core/conversation_manager.py @@ -9,6 +9,7 @@ confirmation policy + resume flows, etc.). """ +import logging import uuid from typing import TYPE_CHECKING @@ -30,7 +31,11 @@ from openhands_cli.tui.core.conversation_switch_controller import ( ConversationSwitchController, ) -from openhands_cli.tui.core.events import ConfirmationDecision, ShowConfirmationPanel +from openhands_cli.tui.core.events import ( + ConfirmationDecision, + LoadOlderEvents, + ShowConfirmationPanel, +) from openhands_cli.tui.core.refinement_controller import RefinementController from openhands_cli.tui.core.runner_factory import RunnerFactory from openhands_cli.tui.core.runner_registry import RunnerRegistry @@ -109,6 +114,19 @@ class ConversationManager(Container): - UserMessageController: rendering + message send/queue behavior - ConfirmationPolicyService + ConfirmationFlowController: policy + resume flows - RefinementController: iterative refinement based on critic results + + Communication pattern + --------------------- + Most coordination flows through Textual's message-based dispatch + (``on_*`` handlers). Two methods bypass this pattern for synchronous + timing reasons: + + - ``ensure_runner()`` — eagerly creates a runner so that + ``replay_historical_events()`` fires before the first user message + (required during ``--resume`` startup ordering). + - ``reload_visualizer_configuration()`` — called directly by the app + after settings changes to reload the visualizer configuration + immediately rather than going through the message queue. """ def __init__( @@ -287,12 +305,33 @@ def _on_confirmation_decision(self, event: ConfirmationDecision) -> None: event.stop() self._confirmation_controller.handle_decision(event.decision) + @on(LoadOlderEvents) + def _on_load_older_events(self, event: LoadOlderEvents) -> None: + """Handle request to load older historical events.""" + event.stop() + runner = self._runners.current + if runner is None: + return + runner.load_older_events() + # ---- Public API for direct calls ---- async def send_message(self, content: str) -> None: """Send a message to the current conversation.""" self.post_message(SendMessage(content)) + def ensure_runner(self, conversation_id: uuid.UUID) -> None: + """Eagerly create a runner for a conversation, triggering event replay. + + BUG-001: During --resume startup, lazy runner creation means + replay_historical_events() never fires until the first user message. + This method forces early runner creation so historical events are + replayed immediately. + """ + logger = logging.getLogger(__name__) + logger.debug("ensure_runner: eagerly creating runner for %s", conversation_id) + self._runners.get_or_create(conversation_id) + def create_conversation(self) -> None: """Create a new conversation.""" self.post_message(CreateConversation()) diff --git a/openhands_cli/tui/core/conversation_runner.py b/openhands_cli/tui/core/conversation_runner.py index 9d4182409..5db9e5701 100644 --- a/openhands_cli/tui/core/conversation_runner.py +++ b/openhands_cli/tui/core/conversation_runner.py @@ -1,8 +1,12 @@ """Conversation runner with confirmation mode support.""" import asyncio +import logging +import time import uuid -from collections.abc import Callable +from collections import deque +from collections.abc import Callable, Iterable, Sequence +from dataclasses import dataclass from typing import TYPE_CHECKING from rich.console import Console @@ -21,6 +25,7 @@ ConversationState as SDKConversationState, ) from openhands.sdk.event.base import Event +from openhands.sdk.event.condenser import Condensation from openhands_cli.setup import setup_conversation from openhands_cli.tui.core.events import ShowConfirmationPanel from openhands_cli.tui.widgets.richlog_visualizer import ConversationVisualizer @@ -31,6 +36,21 @@ from openhands_cli.tui.core.state import ConversationContainer +logger = logging.getLogger(__name__) + + +@dataclass(frozen=True) +class ReplayPlan: + summary_text: str | None + tail_events: list[Event] + total_count: int + hidden_count: int + has_condensation: bool + condensed_count: int | None + loadable_events: list[Event] + loaded_start_index: int + + class ConversationRunner: """Conversation runner with non-blocking confirmation mode support. @@ -80,6 +100,8 @@ def __init__( ) self._running = False + self._replayed_event_offset = 0 + self._replay_complete = False # State for reading (is_confirmation_active) and updating (set_running) self._state = state @@ -266,6 +288,189 @@ def pause_runner_without_blocking(self): if self.is_running: asyncio.create_task(self.pause()) + def replay_historical_events(self) -> int: + """Replay historical events from the conversation into the visualizer. + + Uses a 3-level deterministic cascade: + 1) Condensation-aware summary + tail path + 2) Windowed tail path + 3) Full passthrough path (only when total <= window) + + Returns: + Count of replayed events, or 0 if already replayed or empty. + """ + if self._replay_complete: + logger.debug( + "replay_historical_events: skip (offset=%d)", + self._replayed_event_offset, + ) + return 0 + + events = self.conversation.state.events + total_count = len(events) + window_size = self.visualizer.cli_settings.replay_window_size + + # LOG-1: Entry + logger.debug( + "replay_historical_events: entry (offset=%d, events=%d, window=%d)", + self._replayed_event_offset, + total_count, + window_size, + ) + if total_count == 0: + logger.debug("replay_historical_events: exit — empty history") + return 0 + + t0 = time.monotonic() + plan = self._build_replay_plan(events, total_count) + + # LOG-2: Path selected + path = "condensation" if plan.has_condensation else ( + "window" if plan.hidden_count > 0 else "full" + ) + logger.debug( + "replay_historical_events: path=%s total=%d tail=%d hidden=%d condensed=%s", + path, plan.total_count, len(plan.tail_events), plan.hidden_count, + plan.condensed_count, + ) + + self.visualizer.set_replay_context( + all_events=plan.loadable_events, + loaded_start_index=plan.loaded_start_index, + summary_text=plan.summary_text, + has_condensation=plan.has_condensation, + condensed_count=plan.condensed_count, + ) + + if plan.hidden_count > 0: + self.visualizer.replay_with_summary( + summary_text=plan.summary_text, + tail_events=plan.tail_events, + total_count=plan.total_count, + hidden_count=plan.hidden_count, + has_condensation=plan.has_condensation, + condensed_count=plan.condensed_count, + ) + else: + self.visualizer.replay_events(plan.tail_events) + + self._replayed_event_offset = len(plan.tail_events) + self._replay_complete = True + elapsed_ms = (time.monotonic() - t0) * 1000 + + # LOG-3: Exit with metrics + logger.debug( + "replay_historical_events: exit — replayed=%d widgets, duration=%.1fms", + self._replayed_event_offset, elapsed_ms, + ) + return self._replayed_event_offset + + def _build_replay_plan( + self, events: Sequence[Event] | Iterable[Event], total_count: int + ) -> ReplayPlan: + """Build deterministic replay plan with condensation→window→full cascade.""" + window_size = self.visualizer.cli_settings.replay_window_size + + try: + condensation_plan = self._build_condensation_plan(events, total_count) + if condensation_plan is not None: + # LOG-4: Condensation path selected + logger.debug( + "_build_replay_plan: condensation path — summary=%s, forgotten=%s", + condensation_plan.summary_text is not None, + condensation_plan.condensed_count, + ) + return condensation_plan + except Exception as exc: + # LOG-5: Fallback triggered + logger.warning( + "_build_replay_plan: condensation failed, fallback to window/full: %s", + exc, + ) + + tail_events = self._extract_tail_events(events, min(window_size, total_count)) + hidden_count = max(0, total_count - len(tail_events)) + return ReplayPlan( + summary_text=None, + tail_events=tail_events, + total_count=total_count, + hidden_count=hidden_count, + has_condensation=False, + condensed_count=None, + loadable_events=list(events) if isinstance(events, Sequence) else tail_events, + loaded_start_index=max(0, total_count - len(tail_events)), + ) + + def _build_condensation_plan( + self, events: Sequence[Event] | Iterable[Event], total_count: int + ) -> ReplayPlan | None: + """Build summary+tail replay plan from latest Condensation event.""" + latest: Condensation | None = None + tail_source: list[Event] = [] + + if isinstance(events, Sequence): + sequence_events = list(events) + latest_idx = -1 + for idx in range(len(sequence_events) - 1, -1, -1): + candidate = sequence_events[idx] + if isinstance(candidate, Condensation): + latest = candidate + latest_idx = idx + break + + if latest is None: + return None + + tail_source = list(sequence_events[latest_idx + 1 :]) + else: + for event in events: + if isinstance(event, Condensation): + latest = event + tail_source = [] + continue + tail_source.append(event) + + if latest is None: + return None + + forgotten = set(latest.forgotten_event_ids) + tail_events = [ + event + for event in tail_source + if event.id not in forgotten and not isinstance(event, Condensation) + ] + + hidden_count = max(0, total_count - len(tail_events)) + loadable_events = tail_source + loaded_start_index = max(0, len(loadable_events) - len(tail_events)) + + return ReplayPlan( + summary_text=latest.summary, + tail_events=tail_events, + total_count=total_count, + hidden_count=hidden_count, + has_condensation=True, + condensed_count=len(forgotten), + loadable_events=loadable_events, + loaded_start_index=loaded_start_index, + ) + + def _extract_tail_events( + self, events: Sequence[Event] | Iterable[Event], window_size: int + ) -> list[Event]: + """Extract latest window with slice-preferred/deque-fallback strategy.""" + if window_size <= 0: + return [] + + if isinstance(events, Sequence): + return list(events[-window_size:]) + + return list(deque(events, maxlen=window_size)) + + def load_older_events(self) -> int: + """Load an older replay page through the visualizer.""" + return self.visualizer.load_older_events() + def get_conversation_summary(self) -> tuple[int, Text]: """Get a summary of the conversation for headless mode output. diff --git a/openhands_cli/tui/core/events.py b/openhands_cli/tui/core/events.py index a7c26107d..62a0bd3c4 100644 --- a/openhands_cli/tui/core/events.py +++ b/openhands_cli/tui/core/events.py @@ -65,3 +65,9 @@ class ConfirmationDecision(Message): def __init__(self, decision: UserConfirmation) -> None: super().__init__() self.decision = decision + + +class LoadOlderEvents(Message): + """Request loading an older page of historical events.""" + + pass diff --git a/openhands_cli/tui/core/runner_registry.py b/openhands_cli/tui/core/runner_registry.py index 81389043f..856de0ff2 100644 --- a/openhands_cli/tui/core/runner_registry.py +++ b/openhands_cli/tui/core/runner_registry.py @@ -41,6 +41,13 @@ def clear_current(self) -> None: self._current_runner = None def get_or_create(self, conversation_id: uuid.UUID) -> ConversationRunner: + """Return the cached runner for *conversation_id*, creating one if needed. + + Must be called from the main thread. A newly-created runner is + **not** cached until ``replay_historical_events()`` completes + successfully. If replay raises, no partially-initialized runner + is stored in the registry and the exception propagates to the caller. + """ runner = self._runners.get(conversation_id) if runner is None: runner = self._factory.create( @@ -48,6 +55,13 @@ def get_or_create(self, conversation_id: uuid.UUID) -> ConversationRunner: message_pump=self._message_pump, notification_callback=self._notification_callback, ) + + # Replay historical events for newly created runners (resume / switch). + # This is a no-op for brand-new conversations with no events. + # IMPORTANT: Replay MUST complete before caching — if replay raises, + # the partially-initialized runner must not pollute the registry. + runner.replay_historical_events() + self._runners[conversation_id] = runner if runner.conversation is not None: diff --git a/openhands_cli/tui/textual_app.py b/openhands_cli/tui/textual_app.py index 551ed7771..2fe27373d 100644 --- a/openhands_cli/tui/textual_app.py +++ b/openhands_cli/tui/textual_app.py @@ -73,6 +73,7 @@ RequestSwitchConfirmation, SendMessage, ) +from openhands_cli.tui.core.events import LoadOlderEvents from openhands_cli.tui.core.conversation_manager import SwitchConfirmed from openhands_cli.tui.core.runner_factory import RunnerFactory from openhands_cli.tui.modals import SettingsScreen @@ -192,6 +193,10 @@ def __init__( ) self.conversation_state.conversation_id = initial_conversation_id + # BUG-001: Track whether this is a resume session so _initialize_main_ui() + # can eagerly create the runner to trigger replay_historical_events(). + self._is_resume = resume_conversation_id is not None + self.conversation_dir = BaseConversation.get_persistence_dir( get_conversations_dir(), initial_conversation_id ) @@ -456,6 +461,16 @@ def _initialize_main_ui(self) -> None: # in SplashContent via data_bind self.conversation_state.set_loaded_resources(loaded_resources) + # BUG-001: For --resume sessions, eagerly create the runner so that + # replay_historical_events() fires before the user sends their first + # message. ORDERING CONSTRAINT: must be called after set_loaded_resources() + # (widgets mounted) and before _process_queued_inputs() so the UI is + # ready to render replayed events. + if self._is_resume and self.conversation_state.conversation_id is not None: + self.conversation_manager.ensure_runner( + self.conversation_state.conversation_id + ) + # Process any queued inputs self._process_queued_inputs() @@ -506,7 +521,13 @@ def on_key(self, event: events.Key) -> None: losing typing context) - When Tab is pressed from input area, focus the most recent (last) cell instead of the first one (unless autocomplete is showing) + - PageUp requests loading older replay history when available """ + if event.key == "pageup": + self.conversation_manager.post_message(LoadOlderEvents()) + event.stop() + event.prevent_default() + return # Handle Tab from input area - focus most recent cell # Skip if autocomplete dropdown is visible (Tab is used for selection) if event.key == "tab" and isinstance(self.focused, Input | TextArea): @@ -541,8 +562,14 @@ def on_mouse_up(self, _event: events.MouseUp) -> None: When the user finishes selecting text by releasing the mouse button, this method checks if there's selected text and copies it to clipboard. """ - # Get selected text from the screen - selected_text = self.screen.get_selected_text() + # Get selected text from the screen. + # BUG-002: Wrap in try/except — during rapid clicking, Toast widgets in + # transient lifecycle states cause KeyError in Textual's style resolution. + # See upstream Textual issue #5646. + try: + selected_text = self.screen.get_selected_text() + except Exception: + return if not selected_text: return diff --git a/openhands_cli/tui/widgets/richlog_visualizer.py b/openhands_cli/tui/widgets/richlog_visualizer.py index 817e82e2c..55e4ef71c 100644 --- a/openhands_cli/tui/widgets/richlog_visualizer.py +++ b/openhands_cli/tui/widgets/richlog_visualizer.py @@ -5,11 +5,13 @@ import re import threading -from typing import TYPE_CHECKING +from collections import deque +from typing import TYPE_CHECKING, Sequence from rich.text import Text -from textual.widgets import Markdown +from textual.widgets import Markdown, Static +from openhands.sdk import TextContent from openhands.sdk.conversation.visualizer.base import ConversationVisualizerBase from openhands.sdk.event import ( ActionEvent, @@ -32,6 +34,7 @@ from openhands_cli.shared.delegate_formatter import format_delegate_title from openhands_cli.stores import CliSettings from openhands_cli.theme import OPENHANDS_THEME +from openhands_cli.tui.core.events import LoadOlderEvents from openhands_cli.tui.widgets.collapsible import ( Collapsible, ) @@ -117,6 +120,17 @@ def __init__( # Track pending actions by tool_call_id for action-observation pairing self._pending_actions: dict[str, tuple[ActionEvent, Collapsible]] = {} + # Replay/loading state for on-demand history loading + self._all_events: list[Event] = [] + self._loaded_start_index: int = 0 + self._banner_widget: Static | Collapsible | None = None + self._summary_text: str | None = None + self._has_condensation: bool = False + self._condensed_count: int | None = None + self._prepend_in_progress: bool = False + self._live_event_buffer: deque[Event] = deque() + self._max_viewable_widgets: int = 2000 + @property def cli_settings(self) -> CliSettings: if self._cli_settings is None: @@ -265,6 +279,10 @@ def _get_agent_model(self) -> str | None: def on_event(self, event: Event) -> None: """Main event handler that creates widgets for events.""" + if self._prepend_in_progress: + self._live_event_buffer.append(event) + return + # Check for TaskTrackerObservation to update/open the plan panel if isinstance(event, ObservationEvent) and isinstance( event.observation, TaskTrackerObservation @@ -855,3 +873,269 @@ def _create_event_collapsible(self, event: Event) -> Collapsible | None: return self._make_collapsible( content_string, f"{agent_prefix}{title}", event ) + + def _render_event_for_replay(self, event: Event) -> None: + """Render a single historical event without replay-end scrolling.""" + # Render user messages that on_event normally skips + if ( + isinstance(event, MessageEvent) + and event.llm_message + and event.llm_message.role == "user" + and not event.sender + ): + text_parts = [] + for item in event.llm_message.content: + if isinstance(item, TextContent): + text_parts.append(item.text) + content_text = "\n".join(text_parts) + if content_text.strip(): + widget = Static(f"> {content_text}", classes="user-message", markup=False) + self._add_widget_to_ui(widget) + return + + # Handle observation pairing (same as on_event) + if isinstance(event, ObservationEvent | UserRejectObservation | AgentErrorEvent): + if self._handle_observation_event(event): + return + # Cross-boundary: action was outside replay window + widget = self._create_cross_boundary_observation_widget(event) + if widget: + self._add_widget_to_ui(widget) + return + + widget = self._create_event_widget(event) + if widget: + self._add_widget_to_ui(widget) + + def _create_cross_boundary_observation_widget( + self, event: ObservationEvent | UserRejectObservation | AgentErrorEvent + ) -> "Widget | None": + """Create widget for an observation whose action is outside the replay window.""" + title = self._extract_meaningful_title(event, "Observation") + title = f"(action not shown) {title}" + content = event.visualize if hasattr(event, "visualize") else str(event) + content_string = self._escape_rich_markup(str(content)) + return self._make_collapsible(content_string, title, event) + + def replay_events(self, events: list[Event]) -> None: + """Replay historical events into the UI without triggering side effects. + + Unlike on_event(), this skips critic handling, telemetry, and plan panel + refreshes. User messages (normally rendered separately by + UserMessageController) are rendered inline so the full conversation + history appears. + + Must be called from the main thread. + """ + for event in events: + self._render_event_for_replay(event) + + # Scroll to the end after replaying all events + if events: + self._container.scroll_end(animate=False) + + def set_replay_context( + self, + all_events: Sequence[Event], + loaded_start_index: int, + summary_text: str | None, + has_condensation: bool, + condensed_count: int | None, + ) -> None: + """Set full replay context for on-demand older-event loading.""" + self._all_events = list(all_events) + self._loaded_start_index = loaded_start_index + self._summary_text = summary_text + self._has_condensation = has_condensation + self._condensed_count = condensed_count + + def load_older_events(self) -> int: + """Load one older page above the current loaded tail while preserving viewport.""" + if self._visible_widget_count() >= self._max_viewable_widgets: + self._update_banner_for_loaded_state() + return 0 + + if self._loaded_start_index <= 0: + return 0 + + page_size = self.cli_settings.replay_window_size + start = max(0, self._loaded_start_index - page_size) + older_events = self._all_events[start : self._loaded_start_index] + if not older_events: + return 0 + + self._prepend_in_progress = True + try: + # Scroll stability strategy from SPEC-002: preserve + restore offset + before_scroll = float(self._container.scroll_y) + + # Render and prepend by mounting before banner (or current first child) + first_child = self._banner_widget if self._banner_widget is not None else ( + self._container.children[0] if self._container.children else None + ) + + mounted = 0 + consumed = 0 + for event in older_events: + if self._visible_widget_count() >= self._max_viewable_widgets: + break + consumed += 1 + widget = self._create_replay_widget(event) + if widget is None: + continue + self._container.mount(widget, before=first_child) + mounted += 1 + + self._loaded_start_index -= consumed + self._update_banner_for_loaded_state() + + if mounted > 0: + # Approximate restoration strategy validated by spike + self._container.scroll_to(y=before_scroll + mounted, animate=False) + finally: + self._prepend_in_progress = False + self._flush_live_event_buffer() + + return mounted + + def _create_replay_widget(self, event: Event) -> "Widget | None": + """Create a widget for historical replay prepend operations.""" + # Keep behavior consistent with replay_events path + if ( + isinstance(event, MessageEvent) + and event.llm_message + and event.llm_message.role == "user" + and not event.sender + ): + text_parts = [ + item.text + for item in event.llm_message.content + if isinstance(item, TextContent) + ] + content_text = "\n".join(text_parts) + if content_text.strip(): + return Static(f"> {content_text}", classes="user-message", markup=False) + return None + return self._create_event_widget(event) + + def _visible_widget_count(self) -> int: + """Count currently mounted history widgets for cap enforcement.""" + return sum( + 1 + for w in self._container.children + if w.id not in {"splash_content", "replay-summary-banner"} + ) + + def _remove_banner_widget(self) -> None: + """Best-effort banner removal that is safe in unit tests without active app.""" + if self._banner_widget is None: + return + try: + if self._banner_widget.is_attached: + self._banner_widget.remove() + except Exception: + pass + self._banner_widget = None + + def _update_banner_for_loaded_state(self) -> None: + """Update banner text and terminal message after each older-page load.""" + hidden_count = self._loaded_start_index + at_cap = self._visible_widget_count() >= self._max_viewable_widgets + + if self._banner_widget is None: + return + + if at_cap: + terminal = Static( + "Maximum viewable history reached.", + classes="replay-summary", + id="replay-summary-banner", + ) + self._remove_banner_widget() + self._banner_widget = terminal + self._container.mount( + terminal, + before=(self._container.children[0] if self._container.children else None), + ) + return + + if hidden_count <= 0: + self._remove_banner_widget() + return + + if isinstance(self._banner_widget, Static): + if self._has_condensation: + condensed_total = self._condensed_count if self._condensed_count is not None else hidden_count + if self._summary_text is None: + self._banner_widget.update( + f"Prior context condensed ({condensed_total} events). Summary not available." + ) + else: + self._banner_widget.update( + f"Prior context: {condensed_total} events condensed" + ) + else: + self._banner_widget.update( + f"{hidden_count:,} earlier events not shown. Press [Page Up] to load more." + ) + elif isinstance(self._banner_widget, Collapsible): + condensed_total = self._condensed_count if self._condensed_count is not None else hidden_count + self._banner_widget.update_title( + f"Prior context: {condensed_total} events condensed" + ) + + def _flush_live_event_buffer(self) -> None: + """Flush live events buffered during prepend operation.""" + while self._live_event_buffer: + event = self._live_event_buffer.popleft() + self.on_event(event) + + def replay_with_summary( + self, + summary_text: str | None, + tail_events: list[Event], + total_count: int, + hidden_count: int, + has_condensation: bool, + condensed_count: int | None, + ) -> None: + """Replay tail events with a top summary/count banner when history is hidden.""" + if hidden_count > 0: + if has_condensation and summary_text is not None: + condensed_total = ( + condensed_count if condensed_count is not None else hidden_count + ) + title = f"Prior context: {condensed_total} events condensed" + banner = Collapsible( + self._escape_rich_markup(summary_text), + title=title, + collapsed=True, + symbol_color="#727987", + classes="replay-summary", + id="replay-summary-banner", + ) + self._banner_widget = banner + self._add_widget_to_ui(banner) + elif has_condensation: + condensed_total = ( + condensed_count if condensed_count is not None else hidden_count + ) + banner = Static( + f"Prior context condensed ({condensed_total} events). Summary not available.", + classes="replay-summary", + id="replay-summary-banner", + ) + self._banner_widget = banner + self._add_widget_to_ui(banner) + else: + banner = Static( + f"{hidden_count:,} earlier events not shown. Press [Page Up] to load more.", + classes="replay-summary", + id="replay-summary-banner", + ) + self._banner_widget = banner + self._add_widget_to_ui(banner) + else: + self._banner_widget = None + + self.replay_events(tail_events) diff --git a/pyproject.toml b/pyproject.toml index c388026d4..6640dd40c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -91,6 +91,9 @@ extend-select = ["B006", "B008", "B039", "RUF012"] [tool.ruff.lint.per-file-ignores] # Test files often have unused arguments (fixtures, mocks, interface implementations) "tests/**/*.py" = ["ARG"] +# BUG-005: entrypoint places a warnings.filterwarnings() call between stdlib and +# third-party imports to suppress RequestsDependencyWarning before it fires. +"openhands_cli/entrypoint.py" = ["E402", "I001"] diff --git a/scripts/scroll_prepend_spike.py b/scripts/scroll_prepend_spike.py new file mode 100644 index 000000000..d5716974b --- /dev/null +++ b/scripts/scroll_prepend_spike.py @@ -0,0 +1,84 @@ +"""SPEC-002 spike: validate Textual VerticalScroll prepend + scroll restoration behavior. + +Usage: + uv run python scripts/scroll_prepend_spike.py +""" + +import asyncio +import json +import time + +from textual.app import App, ComposeResult +from textual.containers import VerticalScroll +from textual.widgets import Static + + +class SpikeApp(App): + def compose(self) -> ComposeResult: + with VerticalScroll(id="scroll"): + for i in range(600): + yield Static(f"row-{i}") + + +async def run_spike() -> dict[str, float | bool | str]: + out: dict[str, float | bool | str] = {} + + app = SpikeApp() + async with app.run_test() as pilot: + await pilot.pause() + container = app.query_one("#scroll", VerticalScroll) + + # Q1: scroll_y semantics + out["q1_scroll_y_type"] = type(container.scroll_y).__name__ + out["q1_scroll_y_initial"] = float(container.scroll_y) + + # Q2: prepend behavior (does it auto-adjust?) + container.scroll_to(y=220, animate=False) + await pilot.pause() + y_before_prepend = float(container.scroll_y) + first = container.children[0] if container.children else None + container.mount(Static("prep-single"), before=first) + await pilot.pause() + y_after_prepend = float(container.scroll_y) + out["q2_scroll_y_before_prepend"] = y_before_prepend + out["q2_scroll_y_after_prepend"] = y_after_prepend + out["q2_auto_adjusted"] = y_after_prepend != y_before_prepend + + # Q3: restoration via scroll_to + target = y_before_prepend + 120 + container.scroll_to(y=target, animate=False) + await pilot.pause() + y_after_restore = float(container.scroll_y) + out["q3_scroll_to_target"] = float(target) + out["q3_scroll_y_after_scroll_to"] = y_after_restore + out["q3_restore_success"] = abs(y_after_restore - target) < 1e-6 + + # Q4: latency for prepend + restore cycle (200 widgets) + container.scroll_to(y=220, animate=False) + await pilot.pause() + baseline = float(container.scroll_y) + t0 = time.perf_counter() + for i in range(200): + first = container.children[0] if container.children else None + container.mount(Static(f"prep-{i}"), before=first) + await pilot.pause() + container.scroll_to(y=baseline + 200, animate=False) + await pilot.pause() + t1 = time.perf_counter() + out["q4_latency_ms_prepend_and_restore_200"] = round((t1 - t0) * 1000, 3) + out["q4_scroll_y_after_restore_200"] = float(container.scroll_y) + + # Q5: batching support + out["q5_container_has_batch_update"] = hasattr(container, "batch_update") + out["q5_app_has_batch_update"] = hasattr(app, "batch_update") + + return out + + +def main() -> None: + result = asyncio.run(run_spike()) + print(json.dumps(result, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/tests/test_entrypoint.py b/tests/test_entrypoint.py new file mode 100644 index 000000000..03dd4df6f --- /dev/null +++ b/tests/test_entrypoint.py @@ -0,0 +1,74 @@ +"""Tests for entrypoint handle_resume_logic keyword normalization (BUG-003).""" + +import argparse +from unittest.mock import MagicMock, patch + +from openhands_cli.entrypoint import handle_resume_logic + + +def _make_args(resume: str | None = None, last: bool = False) -> argparse.Namespace: + """Create a minimal Namespace matching the CLI's argparse output.""" + return argparse.Namespace(resume=resume, last=last) + + +class TestHandleResumeLogicNormalization: + """Test that `--resume last` (case-insensitive) normalizes to the --last path.""" + + @patch("openhands_cli.conversations.store.local.LocalFileStore") + def test_resume_last_lowercase(self, mock_store_cls: MagicMock) -> None: + """--resume last → sets args.last=True and delegates to --last code path.""" + mock_store = mock_store_cls.return_value + mock_conv = MagicMock() + mock_conv.id = "abc-123" + mock_store.list_conversations.return_value = [mock_conv] + + args = _make_args(resume="last", last=False) + result = handle_resume_logic(args) + + assert args.last is True + assert result == "abc-123" + mock_store.list_conversations.assert_called_once_with(limit=1) + + @patch("openhands_cli.conversations.store.local.LocalFileStore") + def test_resume_LAST_uppercase(self, mock_store_cls: MagicMock) -> None: + """--resume LAST → case-insensitive normalization.""" + mock_store = mock_store_cls.return_value + mock_conv = MagicMock() + mock_conv.id = "abc-123" + mock_store.list_conversations.return_value = [mock_conv] + + args = _make_args(resume="LAST", last=False) + result = handle_resume_logic(args) + + assert args.last is True + assert result == "abc-123" + + @patch("openhands_cli.conversations.store.local.LocalFileStore") + def test_resume_Last_mixed_case(self, mock_store_cls: MagicMock) -> None: + """--resume Last → mixed-case normalization.""" + mock_store = mock_store_cls.return_value + mock_conv = MagicMock() + mock_conv.id = "abc-123" + mock_store.list_conversations.return_value = [mock_conv] + + args = _make_args(resume="Last", last=False) + result = handle_resume_logic(args) + + assert args.last is True + assert result == "abc-123" + + def test_resume_valid_uuid_unchanged(self) -> None: + """--resume → passed through unchanged, args.last stays False.""" + test_uuid = "550e8400-e29b-41d4-a716-446655440000" + args = _make_args(resume=test_uuid, last=False) + result = handle_resume_logic(args) + + assert args.last is False + assert result == test_uuid + + def test_resume_none_new_conversation(self) -> None: + """No --resume flag → returns None (new conversation).""" + args = _make_args(resume=None, last=False) + result = handle_resume_logic(args) + + assert result is None diff --git a/tests/tui/core/test_conversation_manager.py b/tests/tui/core/test_conversation_manager.py new file mode 100644 index 000000000..c63fd0c04 --- /dev/null +++ b/tests/tui/core/test_conversation_manager.py @@ -0,0 +1,60 @@ +"""Tests for ConversationManager.ensure_runner() (BUG-001).""" + +import uuid +from unittest.mock import MagicMock + +from openhands_cli.tui.core.conversation_manager import ConversationManager +from openhands_cli.tui.core.events import LoadOlderEvents + + +class TestLoadOlderEvents: + """Verify load-older message delegates to current runner.""" + + def test_load_older_events_delegates_to_runner(self) -> None: + manager = object.__new__(ConversationManager) + runner = MagicMock() + registry = MagicMock() + registry.current = runner + manager._runners = registry + + event = LoadOlderEvents() + manager._on_load_older_events(event) + + runner.load_older_events.assert_called_once_with() + + def test_load_older_events_no_runner_is_noop(self) -> None: + manager = object.__new__(ConversationManager) + registry = MagicMock() + registry.current = None + manager._runners = registry + + event = LoadOlderEvents() + manager._on_load_older_events(event) + + +class TestEnsureRunner: + """Verify ensure_runner() eagerly creates a runner via RunnerRegistry.""" + + def test_ensure_runner_calls_get_or_create(self) -> None: + """ensure_runner() delegates to _runners.get_or_create() with the ID.""" + manager = object.__new__(ConversationManager) + mock_registry = MagicMock() + manager._runners = mock_registry + + conversation_id = uuid.uuid4() + manager.ensure_runner(conversation_id) + + mock_registry.get_or_create.assert_called_once_with(conversation_id) + + def test_ensure_runner_called_exactly_once(self) -> None: + """Calling ensure_runner() twice creates only on the first call + (get_or_create is idempotent, but we verify it's called each time).""" + manager = object.__new__(ConversationManager) + mock_registry = MagicMock() + manager._runners = mock_registry + + conversation_id = uuid.uuid4() + manager.ensure_runner(conversation_id) + manager.ensure_runner(conversation_id) + + assert mock_registry.get_or_create.call_count == 2 diff --git a/tests/tui/core/test_conversation_runner_replay.py b/tests/tui/core/test_conversation_runner_replay.py new file mode 100644 index 000000000..67487663a --- /dev/null +++ b/tests/tui/core/test_conversation_runner_replay.py @@ -0,0 +1,523 @@ +"""Tests for ConversationRunner replay planning and replay_historical_events().""" + +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest + +from openhands.sdk.event.condenser import Condensation + + +class TestReplayHistoricalEvents: + """Tests for replay_historical_events and replay plan behavior.""" + + @pytest.fixture + def runner(self): + """Create a ConversationRunner with mocked dependencies.""" + with patch( + "openhands_cli.tui.core.conversation_runner.setup_conversation" + ) as mock_setup: + mock_conversation = MagicMock() + mock_conversation.state.events = [] + mock_setup.return_value = mock_conversation + + import uuid + + from openhands_cli.tui.core.conversation_runner import ConversationRunner + + visualizer = MagicMock() + visualizer.cli_settings = SimpleNamespace(replay_window_size=200) + + r = ConversationRunner( + conversation_id=uuid.uuid4(), + state=MagicMock(), + message_pump=MagicMock(), + notification_callback=MagicMock(), + visualizer=visualizer, + ) + r.conversation = mock_conversation + return r + + def test_replays_all_events_in_order(self, runner): + """replay_historical_events routes non-condensed history through replay_events.""" + events = [MagicMock(name="ev1"), MagicMock(name="ev2"), MagicMock(name="ev3")] + runner.conversation.state.events = events + + count = runner.replay_historical_events() + + assert count == 3 + runner.visualizer.set_replay_context.assert_called_once() + runner.visualizer.replay_events.assert_called_once_with(events) + runner.visualizer.replay_with_summary.assert_not_called() + + def test_empty_history_returns_zero(self, runner): + """No-op for empty histories.""" + runner.conversation.state.events = [] + + count = runner.replay_historical_events() + + assert count == 0 + runner.visualizer.replay_events.assert_not_called() + runner.visualizer.replay_with_summary.assert_not_called() + + def test_idempotent_second_call_returns_zero(self, runner): + """Second call does not duplicate replay after offset has been set.""" + events = [MagicMock(name="ev1")] + runner.conversation.state.events = events + + first = runner.replay_historical_events() + assert first == 1 + + second = runner.replay_historical_events() + assert second == 0 + assert runner.visualizer.replay_events.call_count == 1 + + def test_offset_set_after_replay(self, runner): + """Offset tracks how many events were replayed.""" + events = [MagicMock(name="ev1"), MagicMock(name="ev2")] + runner.conversation.state.events = events + + runner.replay_historical_events() + + assert runner._replayed_event_offset == 2 + + def test_condensation_path_uses_replay_with_summary(self, runner): + """Latest valid condensation drives summary+tail replay path.""" + old_event = MagicMock(name="old") + old_event.id = "old-1" + + condensation = Condensation( + id="cond-1", + llm_response_id="resp-1", + forgotten_event_ids=["old-1"], + summary="Condensed setup and planning", + ) + + tail_1 = MagicMock(name="tail1") + tail_1.id = "tail-1" + tail_2 = MagicMock(name="tail2") + tail_2.id = "tail-2" + + events = [old_event, condensation, tail_1, tail_2] + runner.conversation.state.events = events + + count = runner.replay_historical_events() + + assert count == 2 + runner.visualizer.replay_with_summary.assert_called_once_with( + summary_text="Condensed setup and planning", + tail_events=[tail_1, tail_2], + total_count=4, + hidden_count=2, + has_condensation=True, + condensed_count=1, + ) + runner.visualizer.replay_events.assert_not_called() + + def test_condensation_failure_falls_back_safely(self, runner): + """Errors in condensation planning fall through to window/full path.""" + events = [MagicMock(name="ev1"), MagicMock(name="ev2")] + runner.conversation.state.events = events + + with patch.object( + runner, + "_build_condensation_plan", + side_effect=RuntimeError("corrupt condensation"), + ): + count = runner.replay_historical_events() + + assert count == 2 + runner.visualizer.set_replay_context.assert_called_once() + runner.visualizer.replay_events.assert_called_once_with(events) + runner.visualizer.replay_with_summary.assert_not_called() + + def test_latest_condensation_is_selected(self, runner): + """Most recent condensation event wins when multiple exist.""" + pre = MagicMock(name="pre") + pre.id = "pre-1" + + first_cond = Condensation( + id="cond-1", + llm_response_id="resp-1", + forgotten_event_ids=["pre-1"], + summary="Old summary", + ) + + middle = MagicMock(name="middle") + middle.id = "mid-1" + + second_cond = Condensation( + id="cond-2", + llm_response_id="resp-2", + forgotten_event_ids=["mid-1"], + summary="Latest summary", + ) + + tail = MagicMock(name="tail") + tail.id = "tail-1" + + events = [pre, first_cond, middle, second_cond, tail] + runner.conversation.state.events = events + + count = runner.replay_historical_events() + + assert count == 1 + runner.visualizer.replay_with_summary.assert_called_once_with( + summary_text="Latest summary", + tail_events=[tail], + total_count=5, + hidden_count=4, + has_condensation=True, + condensed_count=1, + ) + + def test_condensation_none_summary_uses_summary_path(self, runner): + """Condensation with summary=None still uses condensation banner branch.""" + hidden = MagicMock(name="hidden") + hidden.id = "hidden-1" + + cond = Condensation( + id="cond-1", + llm_response_id="resp-1", + forgotten_event_ids=["hidden-1"], + summary=None, + ) + + tail = MagicMock(name="tail") + tail.id = "tail-1" + + events = [hidden, cond, tail] + runner.conversation.state.events = events + + count = runner.replay_historical_events() + + assert count == 1 + runner.visualizer.replay_with_summary.assert_called_once_with( + summary_text=None, + tail_events=[tail], + total_count=3, + hidden_count=2, + has_condensation=True, + condensed_count=1, + ) + + def test_extract_tail_events_uses_deque_fallback_for_iterable(self, runner): + """Iterator-only event logs use deque fallback without requiring slicing.""" + events = [MagicMock(name=f"ev{i}") for i in range(5)] + + tail = runner._extract_tail_events(iter(events), window_size=2) + + assert tail == events[-2:] + + def test_runner_forwards_load_older_events_to_visualizer(self, runner): + """Runner delegates older-page loading to visualizer.""" + runner.visualizer.load_older_events.return_value = 7 + + loaded = runner.load_older_events() + + assert loaded == 7 + runner.visualizer.load_older_events.assert_called_once_with() + + def test_condensation_scan_fallback_for_non_sequence(self, runner): + """Non-sequence streams still select the latest condensation event.""" + pre = MagicMock(name="pre") + pre.id = "pre-1" + + first_cond = Condensation( + id="cond-1", + llm_response_id="resp-1", + forgotten_event_ids=["pre-1"], + summary="Old summary", + ) + + mid = MagicMock(name="mid") + mid.id = "mid-1" + + second_cond = Condensation( + id="cond-2", + llm_response_id="resp-2", + forgotten_event_ids=["mid-1"], + summary="Latest summary", + ) + + tail = MagicMock(name="tail") + tail.id = "tail-1" + + stream = iter([pre, first_cond, mid, second_cond, tail]) + + plan = runner._build_replay_plan(stream, total_count=5) + + assert plan.summary_text == "Latest summary" + assert plan.tail_events == [tail] + assert plan.has_condensation is True + assert plan.condensed_count == 1 + + def test_build_replay_plan_window_path_sets_hidden_count(self, runner): + """Window path computes hidden_count deterministically.""" + runner.visualizer.cli_settings.replay_window_size = 3 + events = [MagicMock(name=f"ev{i}") for i in range(6)] + + plan = runner._build_replay_plan(events, total_count=len(events)) + + assert plan.summary_text is None + assert plan.tail_events == events[-3:] + assert plan.total_count == 6 + assert plan.hidden_count == 3 + assert plan.has_condensation is False + assert plan.condensed_count is None + assert plan.loadable_events == events + assert plan.loaded_start_index == 3 + + +class TestReplayEdgeCaseBoundaries: + """Edge-case boundary tests for replay cascade (T05.05).""" + + @pytest.fixture + def runner(self): + """Create a ConversationRunner with mocked dependencies.""" + with patch( + "openhands_cli.tui.core.conversation_runner.setup_conversation" + ) as mock_setup: + mock_conversation = MagicMock() + mock_conversation.state.events = [] + mock_setup.return_value = mock_conversation + + import uuid + + from openhands_cli.tui.core.conversation_runner import ConversationRunner + + visualizer = MagicMock() + visualizer.cli_settings = SimpleNamespace(replay_window_size=200) + + r = ConversationRunner( + conversation_id=uuid.uuid4(), + state=MagicMock(), + message_pump=MagicMock(), + notification_callback=MagicMock(), + visualizer=visualizer, + ) + r.conversation = mock_conversation + return r + + def test_window_size_one_returns_single_event(self, runner): + """Window=1 should replay exactly the last event.""" + runner.visualizer.cli_settings.replay_window_size = 1 + events = [MagicMock(name=f"ev{i}") for i in range(5)] + runner.conversation.state.events = events + + count = runner.replay_historical_events() + + assert count == 1 + runner.visualizer.replay_with_summary.assert_called_once() + call_kwargs = runner.visualizer.replay_with_summary.call_args + assert call_kwargs[1]["tail_events"] == [events[-1]] + assert call_kwargs[1]["hidden_count"] == 4 + + def test_window_larger_than_event_count_replays_all(self, runner): + """Window > total events should replay everything with no hidden count.""" + runner.visualizer.cli_settings.replay_window_size = 500 + events = [MagicMock(name=f"ev{i}") for i in range(3)] + runner.conversation.state.events = events + + count = runner.replay_historical_events() + + assert count == 3 + runner.visualizer.replay_events.assert_called_once_with(events) + runner.visualizer.replay_with_summary.assert_not_called() + + def test_zero_events_returns_zero(self, runner): + """Empty event list produces no replay at all.""" + runner.conversation.state.events = [] + + count = runner.replay_historical_events() + + assert count == 0 + runner.visualizer.replay_events.assert_not_called() + runner.visualizer.replay_with_summary.assert_not_called() + runner.visualizer.set_replay_context.assert_not_called() + + def test_exact_window_size_replays_all(self, runner): + """When total == window_size, all events replay with no hidden.""" + runner.visualizer.cli_settings.replay_window_size = 5 + events = [MagicMock(name=f"ev{i}") for i in range(5)] + runner.conversation.state.events = events + + count = runner.replay_historical_events() + + assert count == 5 + runner.visualizer.replay_events.assert_called_once_with(events) + runner.visualizer.replay_with_summary.assert_not_called() + + def test_single_event_with_default_window(self, runner): + """Single event with default window produces full passthrough.""" + events = [MagicMock(name="only")] + runner.conversation.state.events = events + + count = runner.replay_historical_events() + + assert count == 1 + runner.visualizer.replay_events.assert_called_once_with(events) + runner.visualizer.replay_with_summary.assert_not_called() + + +class TestForgottenEventIdsFiltering: + """Tests for forgotten_event_ids exclusion in _build_condensation_plan (T07.02).""" + + @pytest.fixture + def runner(self): + """Create a ConversationRunner with mocked dependencies.""" + with patch( + "openhands_cli.tui.core.conversation_runner.setup_conversation" + ) as mock_setup: + mock_conversation = MagicMock() + mock_conversation.state.events = [] + mock_setup.return_value = mock_conversation + + import uuid + + from openhands_cli.tui.core.conversation_runner import ConversationRunner + + visualizer = MagicMock() + visualizer.cli_settings = SimpleNamespace(replay_window_size=200) + + r = ConversationRunner( + conversation_id=uuid.uuid4(), + state=MagicMock(), + message_pump=MagicMock(), + notification_callback=MagicMock(), + visualizer=visualizer, + ) + r.conversation = mock_conversation + return r + +class TestReplayIdempotenceWithEmptyTail: + """Regression: replay guard must work even when tail_events is empty (v0.04b).""" + + @pytest.fixture + def runner(self): + """Create a ConversationRunner with mocked dependencies.""" + with patch( + "openhands_cli.tui.core.conversation_runner.setup_conversation" + ) as mock_setup: + mock_conversation = MagicMock() + mock_conversation.state.events = [] + mock_setup.return_value = mock_conversation + + import uuid + + from openhands_cli.tui.core.conversation_runner import ConversationRunner + + visualizer = MagicMock() + visualizer.cli_settings = SimpleNamespace(replay_window_size=200) + + r = ConversationRunner( + conversation_id=uuid.uuid4(), + state=MagicMock(), + message_pump=MagicMock(), + notification_callback=MagicMock(), + visualizer=visualizer, + ) + r.conversation = mock_conversation + return r + + def test_replay_idempotent_with_empty_tail_after_condensation(self, runner): + """Calling replay twice when tail_events=[] must not duplicate banners.""" + condensation = Condensation( + id="cond-1", + llm_response_id="resp-1", + forgotten_event_ids=[], + summary="Condensed context", + ) + runner.conversation.state.events = [condensation] + + count1 = runner.replay_historical_events() + count2 = runner.replay_historical_events() + + assert count2 == 0, "Second replay call must be blocked by guard" + assert runner.visualizer.replay_with_summary.call_count == 1, ( + "replay_with_summary must be called exactly once, not duplicated" + ) + + +class TestForgottenEventIdsFiltering: + """Tests for forgotten_event_ids exclusion in _build_condensation_plan (T07.02).""" + + @pytest.fixture + def runner(self): + """Create a ConversationRunner with mocked dependencies.""" + with patch( + "openhands_cli.tui.core.conversation_runner.setup_conversation" + ) as mock_setup: + mock_conversation = MagicMock() + mock_conversation.state.events = [] + mock_setup.return_value = mock_conversation + + import uuid + + from openhands_cli.tui.core.conversation_runner import ConversationRunner + + visualizer = MagicMock() + visualizer.cli_settings = SimpleNamespace(replay_window_size=200) + + r = ConversationRunner( + conversation_id=uuid.uuid4(), + state=MagicMock(), + message_pump=MagicMock(), + notification_callback=MagicMock(), + visualizer=visualizer, + ) + r.conversation = mock_conversation + return r + + def test_forgotten_tail_event_excluded_from_plan(self, runner): + """A tail event whose ID is in forgotten_event_ids is excluded from tail_events.""" + pre = MagicMock(name="pre") + pre.id = "pre-1" + + condensation = Condensation( + id="cond-1", + llm_response_id="resp-1", + forgotten_event_ids=["pre-1", "tail-forgotten"], + summary="Summary text", + ) + + tail_ok = MagicMock(name="tail_ok") + tail_ok.id = "tail-ok" + + tail_forgotten = MagicMock(name="tail_forgotten") + tail_forgotten.id = "tail-forgotten" + + events = [pre, condensation, tail_ok, tail_forgotten] + + plan = runner._build_condensation_plan(events, total_count=4) + + assert plan is not None + assert plan.tail_events == [tail_ok] + assert plan.has_condensation is True + assert plan.condensed_count == 2 + + def test_forgotten_tail_event_excluded_iterator_path(self, runner): + """Iterator path also excludes tail events in forgotten_event_ids.""" + pre = MagicMock(name="pre") + pre.id = "pre-1" + + condensation = Condensation( + id="cond-1", + llm_response_id="resp-1", + forgotten_event_ids=["pre-1", "tail-forgotten"], + summary="Summary text", + ) + + tail_ok = MagicMock(name="tail_ok") + tail_ok.id = "tail-ok" + + tail_forgotten = MagicMock(name="tail_forgotten") + tail_forgotten.id = "tail-forgotten" + + stream = iter([pre, condensation, tail_ok, tail_forgotten]) + + plan = runner._build_condensation_plan(stream, total_count=4) + + assert plan is not None + assert plan.tail_events == [tail_ok] + assert plan.condensed_count == 2 diff --git a/tests/tui/core/test_integration_load_older.py b/tests/tui/core/test_integration_load_older.py new file mode 100644 index 000000000..2b3e040d1 --- /dev/null +++ b/tests/tui/core/test_integration_load_older.py @@ -0,0 +1,164 @@ +"""Integration test: LoadOlderEvents full chain (T06.01). + +Validates the message flow: ConversationManager → ConversationRunner → ConversationVisualizer +for the on-demand older-event loading feature. +""" + +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest +from textual.widgets import Static + +from openhands.sdk import TextContent +from openhands.sdk.event import MessageEvent +from openhands.sdk.event.base import Event +from openhands_cli.tui.core.conversation_manager import ConversationManager +from openhands_cli.tui.core.events import LoadOlderEvents +from openhands_cli.tui.widgets.richlog_visualizer import ConversationVisualizer + + +def _make_mock_event(name: str) -> Event: + ev = MagicMock(spec=Event, name=name) + ev.id = name + return ev + + +def _make_user_event(text: str) -> MessageEvent: + ev = MagicMock(spec=MessageEvent) + ev.llm_message = MagicMock() + ev.llm_message.role = "user" + ev.llm_message.content = [MagicMock(spec=TextContent, text=text)] + ev.sender = None + ev.__class__ = MessageEvent + return ev + + +def _build_visualizer() -> ConversationVisualizer: + """Build a minimal ConversationVisualizer for integration testing.""" + from textual.app import App + import collections + + app = MagicMock(spec=App) + vis = ConversationVisualizer.__new__(ConversationVisualizer) + vis._container = MagicMock() + vis._container.children = [] + vis._app = app + vis._name = None + vis._main_thread_id = 0 + vis._cli_settings = MagicMock(replay_window_size=2) + vis._pending_actions = {} + vis._all_events = [] + vis._loaded_start_index = 0 + vis._banner_widget = None + vis._summary_text = None + vis._has_condensation = False + vis._condensed_count = None + vis._prepend_in_progress = False + vis._live_event_buffer = collections.deque() + vis._max_viewable_widgets = 2000 + return vis + + +class TestIntegrationLoadOlderEvents: + """End-to-end chain test: manager → runner → visualizer.""" + + def test_full_chain_loads_older_events(self): + """Complete chain: set context → initial replay → load older via runner.""" + vis = _build_visualizer() + + # Simulate 5 events, window=2 + events = [_make_user_event(f"msg-{i}") for i in range(5)] + + with patch( + "openhands_cli.tui.core.conversation_runner.setup_conversation" + ) as mock_setup: + import uuid + from openhands_cli.tui.core.conversation_runner import ConversationRunner + + mock_conv = MagicMock() + mock_conv.state.events = events + mock_setup.return_value = mock_conv + + runner = ConversationRunner( + conversation_id=uuid.uuid4(), + state=MagicMock(), + message_pump=MagicMock(), + notification_callback=MagicMock(), + visualizer=vis, + ) + runner.conversation = mock_conv + + # Initial replay should use window=2 + count = runner.replay_historical_events() + assert count == 2 + + # Visualizer context should be set with all 5 events + assert vis._loaded_start_index == 3 # 5 - 2 = 3 + assert len(vis._all_events) == 5 + + # Now simulate a banner widget for the prepend path + banner = Static("banner", id="replay-summary-banner", classes="replay-summary") + vis._banner_widget = banner + vis._container.children = [banner] + + # Load older events via runner (same as manager._on_load_older_events) + loaded = runner.load_older_events() + + # Should load page_size=2 older events (indices 1,2) + assert loaded == 2 + assert vis._loaded_start_index == 1 + + def test_manager_delegates_to_runner_which_loads(self): + """ConversationManager._on_load_older_events → runner.load_older_events → visualizer.""" + manager = object.__new__(ConversationManager) + mock_runner = MagicMock() + mock_runner.load_older_events.return_value = 3 + registry = MagicMock() + registry.current = mock_runner + manager._runners = registry + + event = LoadOlderEvents() + manager._on_load_older_events(event) + + mock_runner.load_older_events.assert_called_once_with() + + def test_cap_prevents_unbounded_loading(self): + """Widget cap stops loading after limit is reached.""" + vis = _build_visualizer() + vis._max_viewable_widgets = 3 + + events = [_make_user_event(f"msg-{i}") for i in range(10)] + vis.set_replay_context( + all_events=events, + loaded_start_index=8, + summary_text=None, + has_condensation=False, + condensed_count=None, + ) + + banner = Static("banner", id="replay-summary-banner", classes="replay-summary") + vis._banner_widget = banner + # Simulate 3 children already at cap + vis._container.children = [ + Static("a"), Static("b"), Static("c"), + ] + + loaded = vis.load_older_events() + assert loaded == 0 # At cap, nothing loads + + def test_load_returns_zero_when_all_loaded(self): + """No more events to load returns zero.""" + vis = _build_visualizer() + + events = [_make_user_event("msg-0")] + vis.set_replay_context( + all_events=events, + loaded_start_index=0, # Already at beginning + summary_text=None, + has_condensation=False, + condensed_count=None, + ) + + loaded = vis.load_older_events() + assert loaded == 0 diff --git a/tests/tui/core/test_nfr5_guard.py b/tests/tui/core/test_nfr5_guard.py new file mode 100644 index 000000000..9d5917b84 --- /dev/null +++ b/tests/tui/core/test_nfr5_guard.py @@ -0,0 +1,34 @@ +"""NFR-5 guard: forbid list(state.events) materialization in replay-related files. + +This test enforces the v0.04 non-functional requirement that replay code must +never eagerly materialise the full event list via ``list(state.events)`` or +``list(self.conversation.state.events)``. The windowed/lazy replay cascade +should be the only access path. +""" + +import re +from pathlib import Path + +import pytest + +# Files covered by the guard (relative to repo root) +_GUARDED_FILES = [ + "openhands_cli/tui/core/conversation_runner.py", + "openhands_cli/tui/widgets/richlog_visualizer.py", + "openhands_cli/tui/core/conversation_manager.py", +] + +_FORBIDDEN_PATTERN = re.compile(r"\blist\s*\(\s*(?:self\.conversation\.)?state\.events\s*\)") + +_REPO_ROOT = Path(__file__).resolve().parents[3] + + +@pytest.mark.parametrize("rel_path", _GUARDED_FILES, ids=lambda p: p.split("/")[-1]) +def test_no_list_state_events_materialisation(rel_path: str) -> None: + """Grep guard: replay files must not contain list(*.events) calls.""" + source = (_REPO_ROOT / rel_path).read_text() + matches = _FORBIDDEN_PATTERN.findall(source) + assert not matches, ( + f"NFR-5 violation in {rel_path}: found forbidden pattern(s) {matches}. " + "Use windowed/lazy replay instead of materialising full event list." + ) diff --git a/tests/tui/core/test_replay_scale_performance.py b/tests/tui/core/test_replay_scale_performance.py new file mode 100644 index 000000000..cf32fcb95 --- /dev/null +++ b/tests/tui/core/test_replay_scale_performance.py @@ -0,0 +1,129 @@ +"""Scale smoke tests and performance regression tests for adaptive replay (T06.02, T06.04). + +Validates: +- Replay cascade operates correctly at 1K, 10K, and 60K event scales +- Widget count remains bounded by window_size (never materialises full history) +- Performance thresholds: 1K<50ms, 10K<100ms, 60K<500ms for _build_replay_plan +""" + +import time +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest + +from openhands.sdk.event.base import Event + + +def _make_events(n: int) -> list[Event]: + """Create n lightweight mock events.""" + events = [] + for i in range(n): + ev = MagicMock(spec=Event) + ev.id = f"ev-{i}" + ev.name = f"ev-{i}" + events.append(ev) + return events + + +def _build_runner(window_size: int = 200): + """Build a ConversationRunner with mocked dependencies.""" + with patch( + "openhands_cli.tui.core.conversation_runner.setup_conversation" + ) as mock_setup: + mock_conv = MagicMock() + mock_conv.state.events = [] + mock_setup.return_value = mock_conv + + import uuid + from openhands_cli.tui.core.conversation_runner import ConversationRunner + + visualizer = MagicMock() + visualizer.cli_settings = SimpleNamespace(replay_window_size=window_size) + + runner = ConversationRunner( + conversation_id=uuid.uuid4(), + state=MagicMock(), + message_pump=MagicMock(), + notification_callback=MagicMock(), + visualizer=visualizer, + ) + runner.conversation = mock_conv + return runner + + +# ============================================================================ +# T06.02: Scale smoke tests +# ============================================================================ + + +class TestReplayScaleSmoke: + """Smoke tests at 1K/10K/60K event scales.""" + + @pytest.mark.parametrize("scale", [1_000, 10_000, 60_000], ids=["1K", "10K", "60K"]) + def test_replay_at_scale_bounded_widget_count(self, scale: int): + """Replay produces at most window_size events, regardless of total scale.""" + window = 200 + runner = _build_runner(window_size=window) + events = _make_events(scale) + runner.conversation.state.events = events + + count = runner.replay_historical_events() + + # Should replay exactly window_size events (since scale >> window) + assert count == window + # Visualizer should receive tail_events of length window + runner.visualizer.replay_with_summary.assert_called_once() + call_kwargs = runner.visualizer.replay_with_summary.call_args[1] + assert len(call_kwargs["tail_events"]) == window + assert call_kwargs["total_count"] == scale + assert call_kwargs["hidden_count"] == scale - window + + @pytest.mark.parametrize("scale", [1_000, 10_000, 60_000], ids=["1K", "10K", "60K"]) + def test_replay_plan_is_deterministic_at_scale(self, scale: int): + """Two consecutive plan builds produce identical results.""" + window = 200 + runner = _build_runner(window_size=window) + events = _make_events(scale) + + plan1 = runner._build_replay_plan(events, total_count=scale) + plan2 = runner._build_replay_plan(events, total_count=scale) + + assert plan1.total_count == plan2.total_count + assert plan1.hidden_count == plan2.hidden_count + assert len(plan1.tail_events) == len(plan2.tail_events) + assert plan1.has_condensation == plan2.has_condensation + + +# ============================================================================ +# T06.04: Graduated performance regression tests +# ============================================================================ + + +class TestReplayPerformanceRegression: + """Performance threshold enforcement for _build_replay_plan.""" + + @pytest.mark.parametrize( + "scale,max_ms", + [(1_000, 50), (10_000, 100), (60_000, 500)], + ids=["1K<50ms", "10K<100ms", "60K<500ms"], + ) + def test_build_replay_plan_within_threshold(self, scale: int, max_ms: int): + """_build_replay_plan completes within graduated thresholds.""" + runner = _build_runner(window_size=200) + events = _make_events(scale) + + # Warm up + runner._build_replay_plan(events, total_count=scale) + + # Measure + t0 = time.monotonic() + plan = runner._build_replay_plan(events, total_count=scale) + elapsed_ms = (time.monotonic() - t0) * 1000 + + assert elapsed_ms < max_ms, ( + f"_build_replay_plan at {scale} events took {elapsed_ms:.1f}ms " + f"(threshold: {max_ms}ms)" + ) + assert len(plan.tail_events) == 200 + assert plan.total_count == scale diff --git a/tests/tui/core/test_runner_registry.py b/tests/tui/core/test_runner_registry.py new file mode 100644 index 000000000..326793775 --- /dev/null +++ b/tests/tui/core/test_runner_registry.py @@ -0,0 +1,85 @@ +"""Tests for RunnerRegistry — replay integration.""" + +import uuid +from unittest.mock import MagicMock + +import pytest + +from openhands_cli.tui.core.runner_registry import RunnerRegistry + + +class TestRunnerRegistryReplay: + """Tests that RunnerRegistry calls replay exactly once on new runner creation.""" + + @pytest.fixture + def mock_factory(self): + factory = MagicMock() + runner = MagicMock() + runner.conversation = MagicMock() + factory.create.return_value = runner + return factory + + @pytest.fixture + def registry(self, mock_factory): + return RunnerRegistry( + factory=mock_factory, + state=MagicMock(), + message_pump=MagicMock(), + notification_callback=MagicMock(), + ) + + def test_replay_called_on_new_runner(self, registry, mock_factory): + """replay_historical_events is called once when a new runner is created.""" + cid = uuid.uuid4() + runner = registry.get_or_create(cid) + + runner.replay_historical_events.assert_called_once() + + def test_replay_not_called_on_cached_runner(self, registry, mock_factory): + """replay_historical_events is NOT called when fetching a cached runner.""" + cid = uuid.uuid4() + runner = registry.get_or_create(cid) + runner.replay_historical_events.reset_mock() + + # Second call should return cached runner without replaying + same_runner = registry.get_or_create(cid) + assert same_runner is runner + runner.replay_historical_events.assert_not_called() + + def test_replay_called_per_conversation(self, registry, mock_factory): + """Each new conversation gets its own replay call.""" + cid_a = uuid.uuid4() + cid_b = uuid.uuid4() + + # Create distinct mock runners for each conversation + runner_a = MagicMock() + runner_a.conversation = MagicMock() + runner_b = MagicMock() + runner_b.conversation = MagicMock() + mock_factory.create.side_effect = [runner_a, runner_b] + + registry.get_or_create(cid_a) + registry.get_or_create(cid_b) + + runner_a.replay_historical_events.assert_called_once() + runner_b.replay_historical_events.assert_called_once() + + def test_replay_failure_leaves_cache_empty(self, registry, mock_factory): + """If replay_historical_events raises, the runner must NOT be cached.""" + cid = uuid.uuid4() + runner = mock_factory.create.return_value + runner.replay_historical_events.side_effect = RuntimeError("replay failed") + + with pytest.raises(RuntimeError, match="replay failed"): + registry.get_or_create(cid) + + assert cid not in registry._runners + + def test_replay_failure_propagates_exception(self, registry, mock_factory): + """The exception from replay_historical_events must propagate to the caller.""" + cid = uuid.uuid4() + runner = mock_factory.create.return_value + runner.replay_historical_events.side_effect = ValueError("bad event") + + with pytest.raises(ValueError, match="bad event"): + registry.get_or_create(cid) diff --git a/tests/tui/modals/settings/test_cli_settings.py b/tests/tui/modals/settings/test_cli_settings.py index b94b13a8f..15a1979ba 100644 --- a/tests/tui/modals/settings/test_cli_settings.py +++ b/tests/tui/modals/settings/test_cli_settings.py @@ -89,6 +89,7 @@ def test_defaults(self): cfg = CliSettings() assert cfg.default_cells_expanded is False assert cfg.auto_open_plan_panel is True + assert cfg.replay_window_size == 200 assert cfg.critic.enable_critic is True assert cfg.critic.enable_iterative_refinement is False assert cfg.critic.critic_threshold == 0.6 @@ -100,6 +101,17 @@ def test_default_cells_expanded_accepts_bool(self, value: bool): cfg = CliSettings(default_cells_expanded=value) assert cfg.default_cells_expanded is value + @pytest.mark.parametrize("value", [10, 200, 1000]) + def test_replay_window_size_accepts_valid_range(self, value: int): + cfg = CliSettings(replay_window_size=value) + assert cfg.replay_window_size == value + + @pytest.mark.parametrize("value", [0, -1, 9999]) + def test_replay_window_size_rejects_invalid_range(self, value: int): + with pytest.raises(ValidationError) as exc_info: + CliSettings(replay_window_size=value) + assert "Replay window size must be between 10 and 1000" in str(exc_info.value) + @pytest.mark.parametrize( "env_value, expected", [ @@ -186,6 +198,7 @@ def test_save_writes_expected_json_format(self, tmp_path: Path): cfg = CliSettings( default_cells_expanded=False, auto_open_plan_panel=False, + replay_window_size=200, critic=CriticSettings( enable_critic=False, enable_iterative_refinement=False, @@ -202,6 +215,7 @@ def test_save_writes_expected_json_format(self, tmp_path: Path): { "default_cells_expanded": False, "auto_open_plan_panel": False, + "replay_window_size": 200, "critic": { "enable_critic": False, "enable_iterative_refinement": False, diff --git a/tests/tui/test_auto_copy.py b/tests/tui/test_auto_copy.py index c1cf7644c..e1a173dbb 100644 --- a/tests/tui/test_auto_copy.py +++ b/tests/tui/test_auto_copy.py @@ -192,3 +192,36 @@ async def test_mouse_up_pyperclip_fails_on_non_linux_shows_success( assert "Selection copied to clipboard" in call_args[0][0] assert "xclip" not in call_args[0][0] assert call_args[1]["title"] == "Auto-copy" + + @pytest.mark.asyncio + async def test_mouse_up_get_selected_text_raises_does_not_crash( + self, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """BUG-002: KeyError from Textual widget lifecycle during rapid clicking + is caught and does not crash the application.""" + monkeypatch.setattr( + SettingsScreen, + "is_initial_setup_required", + lambda env_overrides_enabled=False: False, + ) + + app = OpenHandsApp(exit_confirmation=False) + + async with app.run_test() as pilot: + oh_app = cast(OpenHandsApp, pilot.app) + + # Mock get_selected_text to raise KeyError (Textual #5646) + oh_app.screen.get_selected_text = MagicMock( + side_effect=KeyError("toast--title") + ) + + # Mock notify to verify it is NOT called + notify_mock = MagicMock() + oh_app.notify = notify_mock + + # Simulate mouse up event — should not raise + oh_app.on_mouse_up(_create_mouse_up_event()) + + # Verify no notification (early return from except branch) + notify_mock.assert_not_called() diff --git a/tests/tui/test_textual_app.py b/tests/tui/test_textual_app.py index d34c3890e..b4aa16117 100644 --- a/tests/tui/test_textual_app.py +++ b/tests/tui/test_textual_app.py @@ -109,6 +109,30 @@ def test_action_toggle_history_calls_panel_toggle(self, monkeypatch): ) +class TestPageUpHistoryLoading: + """Tests for PageUp -> LoadOlderEvents wiring.""" + + def test_pageup_posts_load_older_message_and_stops_event(self): + app = OpenHandsApp.__new__(OpenHandsApp) + app.conversation_manager = Mock() + + event = Mock() + event.key = "pageup" + event.stop = Mock() + event.prevent_default = Mock() + event.is_printable = False + + app.on_key(event) + + app.conversation_manager.post_message.assert_called_once() + posted = app.conversation_manager.post_message.call_args[0][0] + from openhands_cli.tui.core.events import LoadOlderEvents + + assert isinstance(posted, LoadOlderEvents) + event.stop.assert_called_once() + event.prevent_default.assert_called_once() + + class TestInputAreaContainerCommands: """Tests for InputAreaContainer command methods.""" diff --git a/tests/tui/widgets/test_richlog_visualizer_replay.py b/tests/tui/widgets/test_richlog_visualizer_replay.py new file mode 100644 index 000000000..011105092 --- /dev/null +++ b/tests/tui/widgets/test_richlog_visualizer_replay.py @@ -0,0 +1,540 @@ +"""Tests for ConversationVisualizer replay rendering behavior.""" + +from unittest.mock import MagicMock, patch + +import pytest +from textual.app import App +from textual.widgets import Static + +from openhands.sdk import MessageEvent, TextContent +from openhands.sdk.event import ObservationEvent +from openhands.sdk.event.base import Event +from openhands_cli.tui.widgets.richlog_visualizer import ConversationVisualizer + + +# ============================================================================ +# Fixtures +# ============================================================================ + + +@pytest.fixture +def mock_container(): + """Create a mock container that supports mount and scroll_end.""" + container = MagicMock() + container.is_vertical_scroll_end = False + return container + + +@pytest.fixture +def visualizer(mock_container): + """Create a ConversationVisualizer with mock app and mock container.""" + app = MagicMock(spec=App) + vis = ConversationVisualizer.__new__(ConversationVisualizer) + # Manually initialize required attributes to avoid full Textual init + vis._container = mock_container + vis._app = app + vis._name = None + vis._main_thread_id = 0 # Will not match, but replay_events doesn't check + vis._cli_settings = MagicMock(replay_window_size=2) + vis._pending_actions = {} + vis._all_events = [] + vis._loaded_start_index = 0 + vis._banner_widget = None + vis._summary_text = None + vis._has_condensation = False + vis._condensed_count = None + vis._prepend_in_progress = False + vis._live_event_buffer = __import__("collections").deque() + vis._max_viewable_widgets = 2000 + return vis + + +def _make_user_message_event(text: str) -> MessageEvent: + """Create a MessageEvent representing a user message.""" + event = MagicMock(spec=MessageEvent) + event.llm_message = MagicMock() + event.llm_message.role = "user" + event.llm_message.content = [MagicMock(spec=TextContent, text=text)] + event.sender = None + # Ensure isinstance checks work + event.__class__ = MessageEvent + return event + + +def _make_observation_event(tool_call_id: str = "tc-1") -> ObservationEvent: + """Create a mock ObservationEvent.""" + event = MagicMock(spec=ObservationEvent) + event.tool_call_id = tool_call_id + event.__class__ = ObservationEvent + return event + + +# ============================================================================ +# T-1: Single user message produces one Static widget +# ============================================================================ + + +class TestReplayRendering: + """Core rendering tests for replay_events().""" + + def test_single_user_message_produces_static_widget(self, visualizer, mock_container): + """T-1: A single user message event produces one Static widget with + CSS class 'user-message' and correct text content.""" + event = _make_user_message_event("Hello world") + + visualizer.replay_events([event]) + + assert mock_container.mount.call_count == 1 + widget = mock_container.mount.call_args[0][0] + assert isinstance(widget, Static) + assert "user-message" in widget.classes + assert "Hello world" in str(widget._Static__content) + + def test_observation_event_routes_through_handler(self, visualizer, mock_container): + """T-2: Observation events route through _handle_observation_event. + Widget order matches event order.""" + user_event = _make_user_message_event("test input") + obs_event = _make_observation_event("tc-1") + + with patch.object( + visualizer, "_handle_observation_event", return_value=True + ) as mock_handler: + visualizer.replay_events([user_event, obs_event]) + + # User message should produce a mounted widget + assert mock_container.mount.call_count == 1 + # Observation should have been routed to handler + mock_handler.assert_called_once_with(obs_event) + + def test_multiple_pairs_render_in_order(self, visualizer, mock_container): + """T-3: Multiple user/observation pairs render in correct order; + widget count matches expected.""" + events = [ + _make_user_message_event("first message"), + _make_observation_event("tc-1"), + _make_user_message_event("second message"), + _make_observation_event("tc-2"), + ] + + with patch.object( + visualizer, "_handle_observation_event", return_value=True + ): + visualizer.replay_events(events) + + # Two user messages should produce two mounted widgets + assert mock_container.mount.call_count == 2 + # Verify ordering + first_widget = mock_container.mount.call_args_list[0][0][0] + second_widget = mock_container.mount.call_args_list[1][0][0] + assert "first message" in str(first_widget._Static__content) + assert "second message" in str(second_widget._Static__content) + + +# ============================================================================ +# T-4: Scroll behavior — scroll_end called once after all events +# ============================================================================ + + +class TestReplayScrollBehavior: + """Scroll behavior tests for replay_events().""" + + def test_scroll_end_called_once_after_all_events(self, visualizer, mock_container): + """T-4: scroll_end(animate=False) is called exactly once after all events.""" + events = [ + _make_user_message_event("msg 1"), + _make_user_message_event("msg 2"), + _make_user_message_event("msg 3"), + ] + + visualizer.replay_events(events) + + mock_container.scroll_end.assert_called_once_with(animate=False) + + +class TestReplayWithSummary: + """Tests for replay_with_summary banner + tail behavior.""" + + def _make_tail_event(self, text: str) -> Event: + return _make_user_message_event(text) + + def test_replay_with_summary_renders_collapsible_banner(self, visualizer, mock_container): + tail = [self._make_tail_event("tail event")] + + visualizer.replay_with_summary( + summary_text="Earlier context summary", + tail_events=tail, + total_count=10, + hidden_count=9, + has_condensation=True, + condensed_count=7, + ) + + # Banner + one tail widget are mounted + assert mock_container.mount.call_count == 2 + banner = mock_container.mount.call_args_list[0][0][0] + assert "replay-summary" in banner.classes + assert banner.id == "replay-summary-banner" + assert "Prior context: 7 events condensed" in str(banner.title) + + def test_replay_with_summary_renders_count_banner_without_summary( + self, visualizer, mock_container + ): + tail = [self._make_tail_event("tail")] + + visualizer.replay_with_summary( + summary_text=None, + tail_events=tail, + total_count=10, + hidden_count=9, + has_condensation=False, + condensed_count=None, + ) + + assert mock_container.mount.call_count == 2 + banner = mock_container.mount.call_args_list[0][0][0] + assert isinstance(banner, Static) + assert "9 earlier events not shown" in str(banner._Static__content) + assert "replay-summary" in banner.classes + assert banner.id == "replay-summary-banner" + + def test_replay_with_summary_no_hidden_count_means_no_banner( + self, visualizer, mock_container + ): + tail = [self._make_tail_event("only")] + + visualizer.replay_with_summary( + summary_text="ignored", + tail_events=tail, + total_count=1, + hidden_count=0, + has_condensation=True, + condensed_count=1, + ) + + assert mock_container.mount.call_count == 1 + + def test_replay_with_summary_condensation_none_uses_fallback_static( + self, visualizer, mock_container + ): + tail = [self._make_tail_event("tail")] + + visualizer.replay_with_summary( + summary_text=None, + tail_events=tail, + total_count=10, + hidden_count=9, + has_condensation=True, + condensed_count=7, + ) + + assert mock_container.mount.call_count == 2 + banner = mock_container.mount.call_args_list[0][0][0] + assert isinstance(banner, Static) + assert "Prior context condensed (7 events). Summary not available." in str( + banner._Static__content + ) + + +# ============================================================================ +# T-5: Empty event list edge case +# ============================================================================ + + +class TestLoadOlderEvents: + """Tests for prepend loading behavior.""" + + def test_load_older_events_prepends_before_banner(self, visualizer, mock_container): + e1 = _make_user_message_event("older-1") + e2 = _make_user_message_event("older-2") + e3 = _make_user_message_event("tail-1") + + banner = Static("banner", id="replay-summary-banner", classes="replay-summary") + mock_container.children = [banner] + + visualizer.set_replay_context( + all_events=[e1, e2, e3], + loaded_start_index=2, + summary_text=None, + has_condensation=False, + condensed_count=None, + ) + visualizer._banner_widget = banner + + loaded = visualizer.load_older_events() + + assert loaded == 2 + assert visualizer._loaded_start_index == 0 + mount_calls = mock_container.mount.call_args_list[:2] + assert len(mount_calls) == 2 + assert all(call.kwargs.get("before") is banner for call in mount_calls) + + def test_load_older_events_cap_stops_loading(self, visualizer, mock_container): + visualizer._max_viewable_widgets = 1 + mock_container.children = [Static("existing")] + + visualizer.set_replay_context( + all_events=[_make_user_message_event("older")], + loaded_start_index=1, + summary_text=None, + has_condensation=False, + condensed_count=None, + ) + + loaded = visualizer.load_older_events() + + assert loaded == 0 + + def test_on_event_buffers_while_prepend_in_progress(self, visualizer): + live = _make_user_message_event("live") + visualizer._prepend_in_progress = True + + visualizer.on_event(live) + + assert len(visualizer._live_event_buffer) == 1 + + def test_update_banner_to_terminal_message_on_cap(self, visualizer, mock_container): + banner = Static("banner", id="replay-summary-banner", classes="replay-summary") + mock_container.children = [banner] + visualizer._banner_widget = banner + visualizer._loaded_start_index = 10 + visualizer._max_viewable_widgets = 1 + mock_container.children = [banner, Static("one")] + + visualizer._update_banner_for_loaded_state() + + assert isinstance(visualizer._banner_widget, Static) + assert "Maximum viewable history reached." in str(visualizer._banner_widget._Static__content) + + +class TestReplayEdgeCases: + """Edge case tests for replay_events().""" + + def test_empty_event_list_produces_no_widgets(self, visualizer, mock_container): + """T-5: An empty event list produces no widgets and no exception.""" + visualizer.replay_events([]) + + mock_container.mount.assert_not_called() + # Per implementation: scroll_end is NOT called when events list is empty + # (the `if events:` guard skips it) + mock_container.scroll_end.assert_not_called() + + def test_cross_boundary_observation_gets_indicator(self, visualizer, mock_container): + """Observation without matching action in window shows '(action not shown)'.""" + obs = _make_observation_event("tc-orphan") + # Ensure _handle_observation_event returns False (no match) + with patch.object( + visualizer, "_handle_observation_event", return_value=False + ), patch.object( + visualizer, + "_create_cross_boundary_observation_widget", + return_value=Static("(action not shown) Obs"), + ) as mock_cross: + visualizer.replay_events([obs]) + + mock_cross.assert_called_once_with(obs) + + +# ============================================================================ +# T-6, T-7, T-8: Side-effect omission regression guards +# ============================================================================ + + +class TestReplaySideEffectOmissions: + """Regression guards verifying that replay_events() intentionally omits + certain side effects that on_event() performs. + + These are negative assertions — they document that the omissions are + intentional design decisions, not bugs. + """ + + def test_critic_event_not_triggered_during_replay(self, visualizer, mock_container): + """T-6: Critic handling is intentionally omitted during replay. + + Replay renders historical events for visual display only. Critic + evaluation is a live-session concern — replaying events should + never trigger critic analysis of already-completed work. + """ + events = [_make_user_message_event("please review this")] + + with patch.object( + visualizer, "_handle_observation_event", return_value=False + ): + visualizer.replay_events(events) + + # Verify no critic-related attributes or methods were invoked. + # The replay code path has no critic references by design. + # This test guards against future changes that accidentally add them. + for call in mock_container.method_calls: + assert "critic" not in str(call).lower(), ( + "Critic side effect detected during replay — intentionally omitted" + ) + + def test_telemetry_not_triggered_during_replay(self, visualizer, mock_container): + """T-7: Telemetry calls are intentionally omitted during replay. + + Replaying historical events must not re-emit telemetry for events + that were already tracked during the original session. This prevents + double-counting and incorrect metrics. + """ + events = [_make_user_message_event("tracked message")] + + with patch( + "openhands_cli.tui.widgets.richlog_visualizer.posthog", + create=True, + ) as mock_posthog: + visualizer.replay_events(events) + + # posthog should not be called during replay + if hasattr(mock_posthog, "capture"): + mock_posthog.capture.assert_not_called() + + def test_plan_panel_not_refreshed_during_replay(self, visualizer, mock_container): + """T-8: Plan panel refresh is intentionally omitted during replay. + + During live sessions, certain events trigger plan panel updates. + During replay, the plan panel state is reconstructed separately — + replay_events() must not trigger incremental plan panel refreshes. + """ + events = [_make_user_message_event("create a plan")] + + visualizer.replay_events(events) + + # Verify no plan-panel-related calls on the app or container + for call in mock_container.method_calls: + assert "plan" not in str(call).lower(), ( + "Plan panel side effect detected during replay — intentionally omitted" + ) + + +# ============================================================================ +# T07.03: Cross-boundary observation widget content test +# ============================================================================ + + +class TestCrossBoundaryObservationWidgetContent: + """Test that _create_cross_boundary_observation_widget() produces correct content.""" + + def test_widget_title_contains_action_not_shown(self, visualizer): + """Real method call produces title with '(action not shown)' prefix (T07.03).""" + obs = _make_observation_event("tc-orphan") + obs.visualize = "Some observation output" + + widget = visualizer._create_cross_boundary_observation_widget(obs) + + assert widget is not None + assert "(action not shown)" in str(widget.title) + + +# ============================================================================ +# T07.05: _flush_live_event_buffer drain test +# ============================================================================ + + +class TestFlushLiveEventBuffer: + """Test that _flush_live_event_buffer drains buffer and re-dispatches via on_event.""" + + def test_flush_drains_buffer_and_redispatches(self, visualizer): + """Buffered events are re-dispatched via on_event after flush (T07.05).""" + live_event = _make_user_message_event("live during prepend") + + # Simulate buffering during prepend + visualizer._prepend_in_progress = True + visualizer.on_event(live_event) + assert len(visualizer._live_event_buffer) == 1 + + # End prepend and flush + visualizer._prepend_in_progress = False + + with patch.object(visualizer, "on_event") as mock_on_event: + visualizer._flush_live_event_buffer() + + assert len(visualizer._live_event_buffer) == 0 + mock_on_event.assert_called_once_with(live_event) + + +# ============================================================================ +# v0.04b Regression: _loaded_start_index consumed-vs-mounted tracking +# ============================================================================ + + +class TestLoadOlderEventsConsumedTracking: + """Regression tests for _loaded_start_index decrement using consumed counter.""" + + def test_load_older_advances_index_when_events_filtered(self, visualizer, mock_container): + """_loaded_start_index must advance by consumed count, not mounted count.""" + e1 = _make_user_message_event("older-1") + e2 = MagicMock(spec=Event) # Will produce None widget + e3 = _make_user_message_event("older-3") + + # Set page size large enough to load all 3 events in one page + visualizer._cli_settings.replay_window_size = 10 + + visualizer.set_replay_context( + all_events=[e1, e2, e3], + loaded_start_index=3, + summary_text=None, + has_condensation=False, + condensed_count=None, + ) + # Start with empty children so _visible_widget_count returns 0 + mock_container.children = [] + + def create_with_filter(event): + if event is e2: + return None + return Static(f"widget-{id(event)}") + + with patch.object(visualizer, "_create_replay_widget", side_effect=create_with_filter): + mounted = visualizer.load_older_events() + + # consumed=3 (all 3 events processed), mounted=2 (e2 filtered) + assert visualizer._loaded_start_index == 0, ( + f"Expected 0 (3 - 3 consumed), got {visualizer._loaded_start_index}" + ) + assert mounted == 2, f"Expected 2 mounted widgets, got {mounted}" + + def test_load_older_cap_break_limits_consumed(self, visualizer, mock_container): + """When _max_viewable_widgets cap breaks loop, consumed != len(older_events).""" + e1 = _make_user_message_event("older-1") + e2 = _make_user_message_event("older-2") + e3 = _make_user_message_event("older-3") + + visualizer.set_replay_context( + all_events=[e1, e2, e3], + loaded_start_index=3, + summary_text=None, + has_condensation=False, + condensed_count=None, + ) + visualizer._max_viewable_widgets = 1 + # Start empty so first _visible_widget_count() returns 0, then after mount returns 1 + widget_count = [0] + + def mock_visible_count(): + return widget_count[0] + + def create_and_count(event): + w = Static(f"widget-{id(event)}") + return w + + original_mount = mock_container.mount + + def mount_and_track(*args, **kwargs): + original_mount(*args, **kwargs) + widget_count[0] += 1 + + mock_container.mount = MagicMock(side_effect=mount_and_track) + mock_container.children = [] + + with ( + patch.object(visualizer, "_visible_widget_count", side_effect=mock_visible_count), + patch.object(visualizer, "_create_replay_widget", side_effect=create_and_count), + ): + mounted = visualizer.load_older_events() + + # First iteration: visible=0 < max=1, consumed=1, mount → visible=1 + # Second iteration: visible=1 >= max=1 → break + assert visualizer._loaded_start_index == 2, ( + f"Expected 2 (3 - 1 consumed), got {visualizer._loaded_start_index}" + ) + assert mounted == 1, f"Expected 1 mounted widget, got {mounted}"