From 8730c48119fe54dbdee2617a1990ba1e5380565d Mon Sep 17 00:00:00 2001 From: John-Mason Shackelford Date: Sat, 28 Feb 2026 12:22:52 -0500 Subject: [PATCH 01/10] Fix terminal escape code leak from stdin Add flush_stdin() function to prevent ANSI escape code responses from terminal queries (like DSR cursor position requests) from leaking to stdin. This prevents garbage characters appearing in the shell prompt or corrupting subsequent input() calls in CLI applications. The fix has three parts: 1. Add flush_stdin() function to the logger module that drains pending stdin data using non-blocking reads with termios 2. Call flush_stdin() after each agent step in LocalConversation.run() 3. Call flush_stdin() before rendering in DefaultConversationVisualizer 4. Register flush_stdin() with atexit for final cleanup The function gracefully handles: - Non-TTY environments (CI, piped commands) - Windows (where termios is not available) - Various error conditions (OSError, termios.error) Fixes #2244 Co-authored-by: openhands --- .../conversation/impl/local_conversation.py | 8 +- .../sdk/conversation/visualizer/default.py | 7 ++ .../openhands/sdk/logger/__init__.py | 2 + openhands-sdk/openhands/sdk/logger/logger.py | 71 +++++++++++++++ tests/sdk/logger/test_flush_stdin.py | 86 +++++++++++++++++++ 5 files changed, 173 insertions(+), 1 deletion(-) create mode 100644 tests/sdk/logger/test_flush_stdin.py diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index 885ec5abdf..5acb84794b 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -34,7 +34,7 @@ from openhands.sdk.hooks import HookConfig, HookEventProcessor, create_hook_callback from openhands.sdk.llm import LLM, Message, TextContent from openhands.sdk.llm.llm_registry import LLMRegistry -from openhands.sdk.logger import get_logger +from openhands.sdk.logger import flush_stdin, get_logger from openhands.sdk.observability.laminar import observe from openhands.sdk.plugin import ( Plugin, @@ -621,6 +621,12 @@ def run(self) -> None: ) iteration += 1 + # Flush any pending terminal query responses that may have + # accumulated during the step (Rich display, tool execution, etc.) + # This prevents ANSI escape codes from leaking to stdin. + # See: https://github.com/OpenHands/software-agent-sdk/issues/2244 + flush_stdin() + # Check for non-finished terminal conditions # Note: We intentionally do NOT check for FINISHED status here. # This allows concurrent user messages to be processed: diff --git a/openhands-sdk/openhands/sdk/conversation/visualizer/default.py b/openhands-sdk/openhands/sdk/conversation/visualizer/default.py index 2314d6560b..e1f0823ca4 100644 --- a/openhands-sdk/openhands/sdk/conversation/visualizer/default.py +++ b/openhands-sdk/openhands/sdk/conversation/visualizer/default.py @@ -23,6 +23,7 @@ ) from openhands.sdk.event.base import Event from openhands.sdk.event.condenser import Condensation, CondensationRequest +from openhands.sdk.logger import flush_stdin logger = logging.getLogger(__name__) @@ -248,6 +249,12 @@ def __init__( def on_event(self, event: Event) -> None: """Main event handler that displays events with Rich formatting.""" + # Flush any pending terminal query responses before rendering. + # This prevents ANSI escape codes from accumulating in stdin + # and corrupting subsequent input() calls. + # See: https://github.com/OpenHands/software-agent-sdk/issues/2244 + flush_stdin() + output = self._create_event_block(event) if output: self._console.print(output) diff --git a/openhands-sdk/openhands/sdk/logger/__init__.py b/openhands-sdk/openhands/sdk/logger/__init__.py index 42b1b09c78..ba2391ded7 100644 --- a/openhands-sdk/openhands/sdk/logger/__init__.py +++ b/openhands-sdk/openhands/sdk/logger/__init__.py @@ -4,6 +4,7 @@ ENV_LOG_DIR, ENV_LOG_LEVEL, IN_CI, + flush_stdin, get_logger, setup_logging, ) @@ -13,6 +14,7 @@ __all__ = [ "get_logger", "setup_logging", + "flush_stdin", "DEBUG", "ENV_JSON", "ENV_LOG_LEVEL", diff --git a/openhands-sdk/openhands/sdk/logger/logger.py b/openhands-sdk/openhands/sdk/logger/logger.py index b728923d85..eaada16aec 100644 --- a/openhands-sdk/openhands/sdk/logger/logger.py +++ b/openhands-sdk/openhands/sdk/logger/logger.py @@ -9,8 +9,10 @@ logger.info("Hello from this module!") """ +import atexit import logging import os +import select from logging.handlers import TimedRotatingFileHandler import litellm @@ -193,3 +195,72 @@ def get_logger(name: str) -> logging.Logger: # Auto-configure if desired if ENV_AUTO_CONFIG: setup_logging() + + +# ========= TERMINAL CLEANUP ========= +# Prevents ANSI escape code leaks during operation and at exit. +# See: https://github.com/OpenHands/software-agent-sdk/issues/2244 +# +# The issue: Terminal queries (like DSR for cursor position) get responses +# written to stdin. If not consumed, these leak as garbage to the shell or +# corrupt the next input() call in CLI applications. + +_cleanup_registered = False + + +def flush_stdin() -> int: + """Flush any pending terminal query responses from stdin. + + On macOS (and some Linux terminals), terminal query responses can leak + to stdin. If not consumed before exit or between conversation turns, + they corrupt input or appear as garbage in the shell. + + This function is called automatically: + 1. At exit (registered via atexit) + 2. After each agent step in LocalConversation.run() + 3. Before rendering events in DefaultConversationVisualizer + + It can also be called manually if needed. + + Returns: + Number of bytes flushed from stdin. + """ + import sys as _sys # Import locally to avoid issues at atexit time + + if not _sys.stdin.isatty(): + return 0 + + try: + import termios + except ImportError: + return 0 # Windows + + flushed = 0 + old = None + try: + old = termios.tcgetattr(_sys.stdin) + new = list(old) + new[3] &= ~(termios.ICANON | termios.ECHO) + new[6][termios.VMIN] = 0 + new[6][termios.VTIME] = 0 + termios.tcsetattr(_sys.stdin, termios.TCSANOW, new) + while select.select([_sys.stdin], [], [], 0)[0]: + data = os.read(_sys.stdin.fileno(), 4096) + if not data: + break + flushed += len(data) + except (OSError, termios.error): + pass + finally: + if old is not None: + try: + termios.tcsetattr(_sys.stdin, termios.TCSANOW, old) + except (OSError, termios.error): + pass + return flushed + + +# Register cleanup at module load time +if not _cleanup_registered: + atexit.register(flush_stdin) + _cleanup_registered = True diff --git a/tests/sdk/logger/test_flush_stdin.py b/tests/sdk/logger/test_flush_stdin.py new file mode 100644 index 0000000000..25bf34b189 --- /dev/null +++ b/tests/sdk/logger/test_flush_stdin.py @@ -0,0 +1,86 @@ +"""Tests for flush_stdin terminal cleanup functionality. + +See: https://github.com/OpenHands/software-agent-sdk/issues/2244 +""" + +import sys +from unittest import mock + +import pytest + +from openhands.sdk.logger import flush_stdin + + +class TestFlushStdin: + """Tests for the flush_stdin function.""" + + def test_flush_stdin_returns_zero_when_not_tty(self): + """flush_stdin should return 0 when stdin is not a tty.""" + with mock.patch.object(sys.stdin, "isatty", return_value=False): + result = flush_stdin() + assert result == 0 + + def test_flush_stdin_returns_zero_on_windows(self): + """flush_stdin should return 0 on Windows where termios is not available.""" + with mock.patch.object(sys.stdin, "isatty", return_value=True): + with mock.patch.dict(sys.modules, {"termios": None}): + # Force ImportError by making termios module fail to import + with mock.patch( + "openhands.sdk.logger.logger.flush_stdin" + ) as mock_flush: + # Since we can't easily mock ImportError for termios, + # we test that the function handles it gracefully + mock_flush.return_value = 0 + result = mock_flush() + assert result == 0 + + def test_flush_stdin_is_exported(self): + """flush_stdin should be available in the public API.""" + from openhands.sdk.logger import flush_stdin as exported_flush_stdin + + assert callable(exported_flush_stdin) + + def test_flush_stdin_handles_oserror_gracefully(self): + """flush_stdin should handle OSError gracefully.""" + import importlib.util + + if importlib.util.find_spec("termios") is None: + pytest.skip("termios not available on this platform") + + with mock.patch.object(sys.stdin, "isatty", return_value=True): + with mock.patch("termios.tcgetattr", side_effect=OSError("test error")): + result = flush_stdin() + assert result == 0 + + def test_flush_stdin_handles_termios_error_gracefully(self): + """flush_stdin should handle termios.error gracefully.""" + import importlib.util + + if importlib.util.find_spec("termios") is None: + pytest.skip("termios not available on this platform") + + import termios + + with mock.patch.object(sys.stdin, "isatty", return_value=True): + with mock.patch( + "termios.tcgetattr", + side_effect=termios.error("test error"), + ): + result = flush_stdin() + assert result == 0 + + +class TestFlushStdinIntegration: + """Integration tests for flush_stdin in conversation flow.""" + + def test_flush_stdin_imported_in_local_conversation(self): + """flush_stdin should be imported in LocalConversation.""" + from openhands.sdk.conversation.impl import local_conversation + + assert hasattr(local_conversation, "flush_stdin") + + def test_flush_stdin_imported_in_default_visualizer(self): + """flush_stdin should be imported in DefaultConversationVisualizer.""" + from openhands.sdk.conversation.visualizer import default + + assert hasattr(default, "flush_stdin") From ef4f328f13fe2045200c5cf880f8cf3678dd168b Mon Sep 17 00:00:00 2001 From: openhands Date: Sat, 28 Feb 2026 18:47:04 +0000 Subject: [PATCH 02/10] chore: trigger CI re-run for updated API breakage comment Co-authored-by: openhands From 012422c2648adf9842be542dc97a67a9a865f49f Mon Sep 17 00:00:00 2001 From: openhands Date: Sat, 28 Feb 2026 19:00:44 +0000 Subject: [PATCH 03/10] fix(tests): improve flush_stdin tests based on review feedback - Replace useless Windows test that mocked the function being tested with a proper skip that documents Windows CI covers this path - Replace weak import-only integration tests with actual behavioral tests: - Test that flush_stdin is called in visualizer.on_event() - Test that atexit handler is registered - Consolidate importlib.util import at module level Addresses review feedback on PR #2245 Co-authored-by: openhands --- tests/sdk/logger/test_flush_stdin.py | 68 ++++++++++++++++++---------- 1 file changed, 43 insertions(+), 25 deletions(-) diff --git a/tests/sdk/logger/test_flush_stdin.py b/tests/sdk/logger/test_flush_stdin.py index 25bf34b189..5c2fd17d55 100644 --- a/tests/sdk/logger/test_flush_stdin.py +++ b/tests/sdk/logger/test_flush_stdin.py @@ -3,6 +3,7 @@ See: https://github.com/OpenHands/software-agent-sdk/issues/2244 """ +import importlib.util import sys from unittest import mock @@ -20,19 +21,22 @@ def test_flush_stdin_returns_zero_when_not_tty(self): result = flush_stdin() assert result == 0 - def test_flush_stdin_returns_zero_on_windows(self): - """flush_stdin should return 0 on Windows where termios is not available.""" - with mock.patch.object(sys.stdin, "isatty", return_value=True): - with mock.patch.dict(sys.modules, {"termios": None}): - # Force ImportError by making termios module fail to import - with mock.patch( - "openhands.sdk.logger.logger.flush_stdin" - ) as mock_flush: - # Since we can't easily mock ImportError for termios, - # we test that the function handles it gracefully - mock_flush.return_value = 0 - result = mock_flush() - assert result == 0 + def test_flush_stdin_returns_zero_when_termios_unavailable(self): + """flush_stdin should return 0 when termios is not available (e.g., Windows). + + Note: This test verifies the code path by checking the function's behavior + when termios import fails. On platforms where termios is available, we + simulate its absence by patching the import mechanism. + """ + if importlib.util.find_spec("termios") is None: + # On Windows/platforms without termios, test the real behavior + with mock.patch.object(sys.stdin, "isatty", return_value=True): + result = flush_stdin() + assert result == 0 + else: + # On Unix, we can't easily unload termios since it's already imported. + # The termios unavailable path is covered by Windows CI. + pytest.skip("termios is available; Windows CI covers the unavailable path") def test_flush_stdin_is_exported(self): """flush_stdin should be available in the public API.""" @@ -42,8 +46,6 @@ def test_flush_stdin_is_exported(self): def test_flush_stdin_handles_oserror_gracefully(self): """flush_stdin should handle OSError gracefully.""" - import importlib.util - if importlib.util.find_spec("termios") is None: pytest.skip("termios not available on this platform") @@ -54,8 +56,6 @@ def test_flush_stdin_handles_oserror_gracefully(self): def test_flush_stdin_handles_termios_error_gracefully(self): """flush_stdin should handle termios.error gracefully.""" - import importlib.util - if importlib.util.find_spec("termios") is None: pytest.skip("termios not available on this platform") @@ -73,14 +73,32 @@ def test_flush_stdin_handles_termios_error_gracefully(self): class TestFlushStdinIntegration: """Integration tests for flush_stdin in conversation flow.""" - def test_flush_stdin_imported_in_local_conversation(self): - """flush_stdin should be imported in LocalConversation.""" - from openhands.sdk.conversation.impl import local_conversation + def test_flush_stdin_called_in_visualizer_on_event(self): + """Verify flush_stdin is called before rendering events in the visualizer.""" + from openhands.sdk.conversation.visualizer.default import ( + DefaultConversationVisualizer, + ) + from openhands.sdk.event import PauseEvent + + visualizer = DefaultConversationVisualizer() + + with mock.patch( + "openhands.sdk.conversation.visualizer.default.flush_stdin" + ) as mock_flush: + # Create a simple event and trigger on_event + event = PauseEvent() + visualizer.on_event(event) + + # Verify flush_stdin was called + mock_flush.assert_called_once() - assert hasattr(local_conversation, "flush_stdin") + def test_flush_stdin_registered_with_atexit(self): + """Verify flush_stdin is registered as an atexit handler.""" - def test_flush_stdin_imported_in_default_visualizer(self): - """flush_stdin should be imported in DefaultConversationVisualizer.""" - from openhands.sdk.conversation.visualizer import default + # Get registered atexit functions + # Note: atexit._run_exitfuncs() would run them, but we just check registration + # The atexit module doesn't expose a clean way to inspect handlers, + # but we can verify by checking the module-level flag + from openhands.sdk.logger import logger as logger_module - assert hasattr(default, "flush_stdin") + assert logger_module._cleanup_registered is True From ae64c17f0630bc656d9ff24fd71c1527cb885643 Mon Sep 17 00:00:00 2001 From: openhands Date: Sat, 28 Feb 2026 20:23:19 +0000 Subject: [PATCH 04/10] fix(flush_stdin): use deep copy for termios settings The previous implementation used list(old) which creates a shallow copy. Since old[6] (the cc array) is itself a list, both old[6] and new[6] pointed to the same object. Modifying new[6][VMIN] and new[6][VTIME] also corrupted old[6], making the restore at the end ineffective. This fix uses a comprehension that copies nested lists, ensuring the original termios settings are preserved for proper restoration. Co-authored-by: openhands --- openhands-sdk/openhands/sdk/logger/logger.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/openhands-sdk/openhands/sdk/logger/logger.py b/openhands-sdk/openhands/sdk/logger/logger.py index eaada16aec..7ac0d83f7b 100644 --- a/openhands-sdk/openhands/sdk/logger/logger.py +++ b/openhands-sdk/openhands/sdk/logger/logger.py @@ -239,7 +239,10 @@ def flush_stdin() -> int: old = None try: old = termios.tcgetattr(_sys.stdin) - new = list(old) + # Deep copy required: old[6] is a list (cc), and list(old) only + # does a shallow copy. Without deep copy, modifying new[6][VMIN] + # would also modify old[6][VMIN], corrupting the restore. + new = [item[:] if isinstance(item, list) else item for item in old] new[3] &= ~(termios.ICANON | termios.ECHO) new[6][termios.VMIN] = 0 new[6][termios.VTIME] = 0 From a8b4af99e6c313752f166948d1e627101ff5c07b Mon Sep 17 00:00:00 2001 From: openhands Date: Sat, 28 Feb 2026 20:26:02 +0000 Subject: [PATCH 05/10] test(flush_stdin): add PTY-based tests for termios restoration Add two new tests using pseudo-terminals to verify real terminal behavior: 1. test_flush_stdin_restores_termios_settings: Verifies that VMIN/VTIME settings in the cc array are properly restored after flush_stdin. This test would have caught the shallow-copy bug. 2. test_flush_stdin_drains_pending_data: Verifies that flush_stdin actually reads and discards pending escape code data from stdin. These tests are skipped on Windows where the pty module is unavailable. Co-authored-by: openhands --- tests/sdk/logger/test_flush_stdin.py | 100 +++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/tests/sdk/logger/test_flush_stdin.py b/tests/sdk/logger/test_flush_stdin.py index 5c2fd17d55..f2bd6486ac 100644 --- a/tests/sdk/logger/test_flush_stdin.py +++ b/tests/sdk/logger/test_flush_stdin.py @@ -4,6 +4,7 @@ """ import importlib.util +import os import sys from unittest import mock @@ -12,6 +13,10 @@ from openhands.sdk.logger import flush_stdin +# Check if pty module is available (Unix only) +PTY_AVAILABLE = importlib.util.find_spec("pty") is not None + + class TestFlushStdin: """Tests for the flush_stdin function.""" @@ -102,3 +107,98 @@ def test_flush_stdin_registered_with_atexit(self): from openhands.sdk.logger import logger as logger_module assert logger_module._cleanup_registered is True + + +@pytest.mark.skipif(not PTY_AVAILABLE, reason="pty module not available (Windows)") +class TestFlushStdinPTY: + """PTY-based tests for flush_stdin to verify real terminal behavior.""" + + def test_flush_stdin_restores_termios_settings(self): + """Verify termios settings are properly restored after flush_stdin. + + This test uses a PTY to create a real terminal environment and verifies + that the termios settings (especially VMIN/VTIME in cc array) are + correctly restored after flush_stdin modifies them temporarily. + + This catches the shallow-copy bug where list(old) would cause old[6] + and new[6] to share the same reference, corrupting the restore. + """ + import pty + import termios + + # Create a pseudo-terminal + master_fd, slave_fd = pty.openpty() + + try: + # Open the slave as a file object to use as stdin + slave_file = os.fdopen(slave_fd, "r") + + # Get original termios settings + original_settings = termios.tcgetattr(slave_file) + original_vmin = original_settings[6][termios.VMIN] + original_vtime = original_settings[6][termios.VTIME] + + # Patch stdin to use our PTY slave + with mock.patch.object(sys, "stdin", slave_file): + # Call flush_stdin - this modifies VMIN/VTIME temporarily + flush_stdin() + + # Get settings after flush_stdin + restored_settings = termios.tcgetattr(slave_file) + restored_vmin = restored_settings[6][termios.VMIN] + restored_vtime = restored_settings[6][termios.VTIME] + + # Verify settings were restored correctly + assert restored_vmin == original_vmin, ( + f"VMIN not restored: expected {original_vmin!r}, got {restored_vmin!r}" + ) + assert restored_vtime == original_vtime, ( + f"VTIME not restored: expected {original_vtime!r}, got {restored_vtime!r}" + ) + + finally: + os.close(master_fd) + # slave_fd is closed when slave_file is closed + try: + slave_file.close() + except OSError: + pass # May already be closed + + def test_flush_stdin_drains_pending_data(self): + """Verify flush_stdin actually drains pending data from stdin. + + This test writes data to a PTY and verifies that flush_stdin + reads and discards it, returning the correct byte count. + """ + import pty + import time + + # Create a pseudo-terminal + master_fd, slave_fd = pty.openpty() + + try: + slave_file = os.fdopen(slave_fd, "r") + + # Write some test data to the master (simulating terminal input) + test_data = b"\x1b[5;10R" # Simulated DSR response + os.write(master_fd, test_data) + + # Give the data time to be available + time.sleep(0.05) + + # Patch stdin to use our PTY slave + with mock.patch.object(sys, "stdin", slave_file): + # flush_stdin should drain the pending data + bytes_flushed = flush_stdin() + + # Verify data was flushed + assert bytes_flushed == len(test_data), ( + f"Expected {len(test_data)} bytes flushed, got {bytes_flushed}" + ) + + finally: + os.close(master_fd) + try: + slave_file.close() + except OSError: + pass From 18c04683219237cb9771749e35c46852505198b8 Mon Sep 17 00:00:00 2001 From: openhands Date: Sat, 28 Feb 2026 20:34:23 +0000 Subject: [PATCH 06/10] fix: address CI lint and type errors - Fix line too long in test assertion message - Fix 'possibly unbound' variable warnings by initializing slave_file - Fix pyright type error by extracting lflag with type assertion Co-authored-by: openhands --- openhands-sdk/openhands/sdk/logger/logger.py | 6 ++++- tests/sdk/logger/test_flush_stdin.py | 24 ++++++++++++-------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/openhands-sdk/openhands/sdk/logger/logger.py b/openhands-sdk/openhands/sdk/logger/logger.py index 7ac0d83f7b..2c6a71056a 100644 --- a/openhands-sdk/openhands/sdk/logger/logger.py +++ b/openhands-sdk/openhands/sdk/logger/logger.py @@ -243,7 +243,11 @@ def flush_stdin() -> int: # does a shallow copy. Without deep copy, modifying new[6][VMIN] # would also modify old[6][VMIN], corrupting the restore. new = [item[:] if isinstance(item, list) else item for item in old] - new[3] &= ~(termios.ICANON | termios.ECHO) + # termios attrs: [iflag, oflag, cflag, lflag, ispeed, ospeed, cc] + # Index 3 is lflag (int), index 6 is cc (list) + lflag = new[3] + assert isinstance(lflag, int) # Help type checker + new[3] = lflag & ~(termios.ICANON | termios.ECHO) new[6][termios.VMIN] = 0 new[6][termios.VTIME] = 0 termios.tcsetattr(_sys.stdin, termios.TCSANOW, new) diff --git a/tests/sdk/logger/test_flush_stdin.py b/tests/sdk/logger/test_flush_stdin.py index f2bd6486ac..d930bc8e88 100644 --- a/tests/sdk/logger/test_flush_stdin.py +++ b/tests/sdk/logger/test_flush_stdin.py @@ -128,6 +128,7 @@ def test_flush_stdin_restores_termios_settings(self): # Create a pseudo-terminal master_fd, slave_fd = pty.openpty() + slave_file = None try: # Open the slave as a file object to use as stdin @@ -150,19 +151,20 @@ def test_flush_stdin_restores_termios_settings(self): # Verify settings were restored correctly assert restored_vmin == original_vmin, ( - f"VMIN not restored: expected {original_vmin!r}, got {restored_vmin!r}" + f"VMIN not restored: {original_vmin!r} -> {restored_vmin!r}" ) assert restored_vtime == original_vtime, ( - f"VTIME not restored: expected {original_vtime!r}, got {restored_vtime!r}" + f"VTIME not restored: {original_vtime!r} -> {restored_vtime!r}" ) finally: os.close(master_fd) # slave_fd is closed when slave_file is closed - try: - slave_file.close() - except OSError: - pass # May already be closed + if slave_file is not None: + try: + slave_file.close() + except OSError: + pass # May already be closed def test_flush_stdin_drains_pending_data(self): """Verify flush_stdin actually drains pending data from stdin. @@ -175,6 +177,7 @@ def test_flush_stdin_drains_pending_data(self): # Create a pseudo-terminal master_fd, slave_fd = pty.openpty() + slave_file = None try: slave_file = os.fdopen(slave_fd, "r") @@ -198,7 +201,8 @@ def test_flush_stdin_drains_pending_data(self): finally: os.close(master_fd) - try: - slave_file.close() - except OSError: - pass + if slave_file is not None: + try: + slave_file.close() + except OSError: + pass From 5c34edb8d3698e80b54f416fa3455eeeb925baa0 Mon Sep 17 00:00:00 2001 From: openhands Date: Sat, 28 Feb 2026 20:53:58 +0000 Subject: [PATCH 07/10] feat(flush_stdin): implement selective flushing with input preservation The previous implementation drained ALL pending stdin data, which could discard legitimate user typeahead. This update implements selective flushing: - Parse stdin data byte-by-byte, identifying escape sequences - Discard only recognized CSI (\x1b[...) and OSC (\x1b]...) sequences - Preserve all other data in a module-level buffer - Add get_buffered_input() for SDK to retrieve preserved input - Add clear_buffered_input() to explicitly clear the buffer New helper functions: - _is_csi_final_byte(): Check if byte is CSI sequence terminator - _find_csi_end(): Find end of CSI sequence (or detect incomplete) - _find_osc_end(): Find end of OSC sequence with BEL or ST terminator - _parse_stdin_data(): Separate escape sequences from user input Tests added for all parsing functions and the new buffer API. Addresses typeahead concern from PR review: https://github.com/OpenHands/software-agent-sdk/pull/2245#issuecomment-3977750749 Co-authored-by: openhands --- .../openhands/sdk/logger/__init__.py | 4 + openhands-sdk/openhands/sdk/logger/logger.py | 187 ++++++++++++- tests/sdk/logger/test_flush_stdin.py | 251 +++++++++++++++++- 3 files changed, 429 insertions(+), 13 deletions(-) diff --git a/openhands-sdk/openhands/sdk/logger/__init__.py b/openhands-sdk/openhands/sdk/logger/__init__.py index ba2391ded7..337ee11b6a 100644 --- a/openhands-sdk/openhands/sdk/logger/__init__.py +++ b/openhands-sdk/openhands/sdk/logger/__init__.py @@ -4,7 +4,9 @@ ENV_LOG_DIR, ENV_LOG_LEVEL, IN_CI, + clear_buffered_input, flush_stdin, + get_buffered_input, get_logger, setup_logging, ) @@ -15,6 +17,8 @@ "get_logger", "setup_logging", "flush_stdin", + "get_buffered_input", + "clear_buffered_input", "DEBUG", "ENV_JSON", "ENV_LOG_LEVEL", diff --git a/openhands-sdk/openhands/sdk/logger/logger.py b/openhands-sdk/openhands/sdk/logger/logger.py index 2c6a71056a..66511243b7 100644 --- a/openhands-sdk/openhands/sdk/logger/logger.py +++ b/openhands-sdk/openhands/sdk/logger/logger.py @@ -204,17 +204,155 @@ def get_logger(name: str) -> logging.Logger: # The issue: Terminal queries (like DSR for cursor position) get responses # written to stdin. If not consumed, these leak as garbage to the shell or # corrupt the next input() call in CLI applications. +# +# This implementation uses SELECTIVE FLUSHING: +# - Parse pending stdin data byte-by-byte +# - Discard only recognized escape sequences (CSI, OSC) +# - Preserve all other data (likely user typeahead) in a buffer +# - Provide get_buffered_input() for SDK to retrieve preserved data _cleanup_registered = False +_preserved_input_buffer: bytes = b"" + + +def _is_csi_final_byte(byte: int) -> bool: + """Check if byte is a CSI sequence final character (0x40-0x7E). + + Per ECMA-48, CSI sequences end with a byte in the range 0x40-0x7E. + Common finals: 'R' (0x52) for cursor position, 'n' for device status. + """ + return 0x40 <= byte <= 0x7E + + +def _find_csi_end(data: bytes, start: int) -> int: + """Find end position of a CSI sequence. + + CSI format: ESC [ [params] [intermediates] final + - Params: 0x30-0x3F (digits, semicolons, etc.) + - Intermediates: 0x20-0x2F (space, !"#$%&'()*+,-./) + - Final: 0x40-0x7E (@A-Z[\\]^_`a-z{|}~) + + Args: + data: The byte buffer to parse. + start: Position of the ESC byte (start of \\x1b[ sequence). + + Returns: + Position AFTER the sequence ends. If incomplete, returns start + (preserving the incomplete sequence as potential user input). + """ + pos = start + 2 # Skip \x1b[ + while pos < len(data): + byte = data[pos] + if _is_csi_final_byte(byte): + return pos + 1 # Include the final byte + if byte < 0x20 or byte > 0x3F and byte < 0x40: + # Invalid byte in CSI sequence - treat as end + return pos + pos += 1 + # Incomplete sequence at end of buffer - preserve it (might be user input) + return start + + +def _find_osc_end(data: bytes, start: int) -> int: + """Find end position of an OSC sequence. + + OSC format: ESC ] ... (BEL or ST) + - BEL terminator: 0x07 + - ST terminator: ESC \\ (0x1b 0x5c) + + Args: + data: The byte buffer to parse. + start: Position of the ESC byte (start of \\x1b] sequence). + + Returns: + Position AFTER the sequence ends. If incomplete, returns start + (preserving the incomplete sequence as potential user input). + """ + pos = start + 2 # Skip \x1b] + while pos < len(data): + if data[pos] == 0x07: # BEL terminator + return pos + 1 + if data[pos] == 0x1B and pos + 1 < len(data) and data[pos + 1] == 0x5C: + return pos + 2 # ST terminator \x1b\\ + pos += 1 + # Incomplete sequence - preserve it + return start + + +def _parse_stdin_data(data: bytes) -> tuple[bytes, int]: + """Parse stdin data, separating escape sequences from user input. + + This function implements selective flushing: it identifies and discards + terminal escape sequence responses (CSI and OSC) while preserving any + other data that may be legitimate user typeahead. + + Args: + data: Raw bytes read from stdin. + + Returns: + Tuple of (preserved_user_input, flushed_escape_sequence_bytes). + """ + preserved = b"" + flushed = 0 + i = 0 + + while i < len(data): + # Check for CSI sequence: \x1b[ + if i + 1 < len(data) and data[i] == 0x1B and data[i + 1] == 0x5B: + end = _find_csi_end(data, i) + if end > i: # Complete sequence found + flushed += end - i + i = end + else: # Incomplete - preserve as potential user input + preserved += data[i : i + 1] + i += 1 + + # Check for OSC sequence: \x1b] + elif i + 1 < len(data) and data[i] == 0x1B and data[i + 1] == 0x5D: + end = _find_osc_end(data, i) + if end > i: # Complete sequence found + flushed += end - i + i = end + else: # Incomplete - preserve + preserved += data[i : i + 1] + i += 1 + + # Single ESC followed by another character - could be escape sequence + # Be conservative: if next byte looks like start of known sequence type, + # preserve both bytes for next iteration or as user input + elif data[i] == 0x1B and i + 1 < len(data): + next_byte = data[i + 1] + # Known sequence starters we don't fully parse: SS2, SS3, DCS, PM, APC + # SS2=N, SS3=O, DCS=P, PM=^, APC=_ + if next_byte in (0x4E, 0x4F, 0x50, 0x5E, 0x5F): + # These are less common; preserve as user input + preserved += data[i : i + 1] + i += 1 + else: + # Unknown escape sequence type - preserve it + preserved += data[i : i + 1] + i += 1 + + # Regular byte - preserve it (likely user input) + else: + preserved += data[i : i + 1] + i += 1 + + return preserved, flushed def flush_stdin() -> int: - """Flush any pending terminal query responses from stdin. + """Flush terminal escape sequences from stdin, preserving user input. On macOS (and some Linux terminals), terminal query responses can leak to stdin. If not consumed before exit or between conversation turns, they corrupt input or appear as garbage in the shell. + This function uses SELECTIVE FLUSHING: + - Only discards recognized escape sequences (CSI `\\x1b[...`, OSC `\\x1b]...`) + - Preserves all other data in an internal buffer + - Use get_buffered_input() to retrieve preserved user input + This function is called automatically: 1. At exit (registered via atexit) 2. After each agent step in LocalConversation.run() @@ -223,8 +361,9 @@ def flush_stdin() -> int: It can also be called manually if needed. Returns: - Number of bytes flushed from stdin. - """ + Number of escape sequence bytes flushed from stdin. + """ # noqa: E501 + global _preserved_input_buffer import sys as _sys # Import locally to avoid issues at atexit time if not _sys.stdin.isatty(): @@ -251,11 +390,16 @@ def flush_stdin() -> int: new[6][termios.VMIN] = 0 new[6][termios.VTIME] = 0 termios.tcsetattr(_sys.stdin, termios.TCSANOW, new) + while select.select([_sys.stdin], [], [], 0)[0]: data = os.read(_sys.stdin.fileno(), 4096) if not data: break - flushed += len(data) + # Parse data: discard escape sequences, preserve user input + preserved, seq_flushed = _parse_stdin_data(data) + flushed += seq_flushed + _preserved_input_buffer += preserved + except (OSError, termios.error): pass finally: @@ -267,6 +411,41 @@ def flush_stdin() -> int: return flushed +def get_buffered_input() -> bytes: + """Get any user input that was preserved during flush_stdin calls. + + When flush_stdin() discards escape sequences, it preserves any other + data that might be legitimate user typeahead. This function retrieves + and clears that buffered input. + + SDK components that read user input should call this function to + prepend any buffered data to their input. + + Returns: + Bytes that were preserved from stdin during flush operations. + The internal buffer is cleared after this call. + + Example: + >>> # In code that reads user input: + >>> buffered = get_buffered_input() + >>> user_input = buffered.decode('utf-8', errors='replace') + input() + """ + global _preserved_input_buffer + data = _preserved_input_buffer + _preserved_input_buffer = b"" + return data + + +def clear_buffered_input() -> None: + """Clear any buffered input without returning it. + + Use this when you want to discard any preserved input, for example + at the start of a new conversation or after a timeout. + """ + global _preserved_input_buffer + _preserved_input_buffer = b"" + + # Register cleanup at module load time if not _cleanup_registered: atexit.register(flush_stdin) diff --git a/tests/sdk/logger/test_flush_stdin.py b/tests/sdk/logger/test_flush_stdin.py index d930bc8e88..231079e134 100644 --- a/tests/sdk/logger/test_flush_stdin.py +++ b/tests/sdk/logger/test_flush_stdin.py @@ -10,7 +10,17 @@ import pytest -from openhands.sdk.logger import flush_stdin +from openhands.sdk.logger import ( + clear_buffered_input, + flush_stdin, + get_buffered_input, +) +from openhands.sdk.logger.logger import ( + _find_csi_end, + _find_osc_end, + _is_csi_final_byte, + _parse_stdin_data, +) # Check if pty module is available (Unix only) @@ -75,6 +85,177 @@ def test_flush_stdin_handles_termios_error_gracefully(self): assert result == 0 +class TestSelectiveFlushing: + """Tests for selective flushing - parsing escape sequences vs user input.""" + + def test_is_csi_final_byte(self): + """Test CSI final byte detection (0x40-0x7E).""" + # Valid final bytes + assert _is_csi_final_byte(0x40) # @ + assert _is_csi_final_byte(0x52) # R (cursor position) + assert _is_csi_final_byte(0x6E) # n (device status) + assert _is_csi_final_byte(0x7E) # ~ (end of range) + + # Invalid final bytes + assert not _is_csi_final_byte(0x3F) # ? (below range) + assert not _is_csi_final_byte(0x7F) # DEL (above range) + assert not _is_csi_final_byte(0x30) # 0 (parameter byte) + + def test_find_csi_end_complete_sequence(self): + """Test finding end of complete CSI sequences.""" + # DSR response: \x1b[5;10R + data = b"\x1b[5;10R" + assert _find_csi_end(data, 0) == 7 + + # Simple sequence: \x1b[H (cursor home) + data = b"\x1b[H" + assert _find_csi_end(data, 0) == 3 + + # With offset + data = b"abc\x1b[5;10Rxyz" + assert _find_csi_end(data, 3) == 10 + + def test_find_csi_end_incomplete_sequence(self): + """Test handling of incomplete CSI sequences (preserved).""" + # Incomplete - no final byte + data = b"\x1b[5;10" + assert _find_csi_end(data, 0) == 0 # Returns start = preserve + + # Just the introducer + data = b"\x1b[" + assert _find_csi_end(data, 0) == 0 + + def test_find_osc_end_with_bel_terminator(self): + """Test finding end of OSC sequences with BEL terminator.""" + # Background color response: \x1b]11;rgb:XXXX/XXXX/XXXX\x07 + data = b"\x1b]11;rgb:30fb/3708/41af\x07" + assert _find_osc_end(data, 0) == len(data) + + def test_find_osc_end_with_st_terminator(self): + """Test finding end of OSC sequences with ST terminator.""" + # OSC with ST terminator: \x1b]...\x1b\\ + data = b"\x1b]11;rgb:30fb/3708/41af\x1b\\" + assert _find_osc_end(data, 0) == len(data) + + def test_find_osc_end_incomplete_sequence(self): + """Test handling of incomplete OSC sequences (preserved).""" + # No terminator + data = b"\x1b]11;rgb:30fb/3708/41af" + assert _find_osc_end(data, 0) == 0 # Returns start = preserve + + def test_parse_stdin_data_csi_only(self): + """Test parsing data with only CSI sequences - all flushed.""" + data = b"\x1b[5;10R" + preserved, flushed = _parse_stdin_data(data) + assert preserved == b"" + assert flushed == 7 + + def test_parse_stdin_data_osc_only(self): + """Test parsing data with only OSC sequences - all flushed.""" + data = b"\x1b]11;rgb:30fb/3708/41af\x07" + preserved, flushed = _parse_stdin_data(data) + assert preserved == b"" + assert flushed == len(data) + + def test_parse_stdin_data_user_input_only(self): + """Test parsing data with only user input - all preserved.""" + data = b"hello world" + preserved, flushed = _parse_stdin_data(data) + assert preserved == b"hello world" + assert flushed == 0 + + def test_parse_stdin_data_mixed_content(self): + """Test parsing mixed escape sequences and user input.""" + # User types "ls" while terminal response arrives + data = b"l\x1b[5;10Rs" + preserved, flushed = _parse_stdin_data(data) + assert preserved == b"ls" + assert flushed == 7 # The CSI sequence + + def test_parse_stdin_data_multiple_sequences(self): + """Test parsing multiple escape sequences.""" + # Two DSR responses: \x1b[5;10R (7 bytes) + \x1b[6;1R (6 bytes) + data = b"\x1b[5;10R\x1b[6;1R" + preserved, flushed = _parse_stdin_data(data) + assert preserved == b"" + assert flushed == 13 # 7 + 6 + + def test_parse_stdin_data_preserves_incomplete_csi(self): + """Test that incomplete CSI sequences are preserved as user input.""" + # User typing escape followed by [ (could be arrow key start) + data = b"\x1b[5;10" # Incomplete - no final byte + preserved, flushed = _parse_stdin_data(data) + # Incomplete sequence preserved byte by byte + assert b"\x1b" in preserved + assert flushed == 0 + + def test_parse_stdin_data_arrow_keys_preserved(self): + """Test that arrow key sequences are handled correctly. + + Note: Arrow keys are CSI sequences like \\x1b[A, \\x1b[B, etc. + They WILL be flushed because they are complete CSI sequences. + This is acceptable because arrow keys during agent execution + are unlikely to be meaningful user input. + """ + # Up arrow + data = b"\x1b[A" + preserved, flushed = _parse_stdin_data(data) + # Arrow key is a complete CSI sequence - gets flushed + assert flushed == 3 + assert preserved == b"" + + +class TestBufferedInput: + """Tests for get_buffered_input and clear_buffered_input.""" + + def test_get_buffered_input_is_exported(self): + """get_buffered_input should be available in the public API.""" + from openhands.sdk.logger import ( + get_buffered_input as exported_get_buffered_input, + ) + + assert callable(exported_get_buffered_input) + + def test_clear_buffered_input_is_exported(self): + """clear_buffered_input should be available in the public API.""" + from openhands.sdk.logger import ( + clear_buffered_input as exported_clear, + ) + + assert callable(exported_clear) + + def test_get_buffered_input_returns_and_clears(self): + """get_buffered_input should return buffer and clear it.""" + from openhands.sdk.logger import logger as logger_module + + # Set up some buffered data + logger_module._preserved_input_buffer = b"test data" + + # Get the data + data = get_buffered_input() + assert data == b"test data" + + # Buffer should now be empty + assert logger_module._preserved_input_buffer == b"" + + # Second call returns empty + data2 = get_buffered_input() + assert data2 == b"" + + def test_clear_buffered_input_clears_buffer(self): + """clear_buffered_input should clear the buffer without returning.""" + from openhands.sdk.logger import logger as logger_module + + # Set up some buffered data + logger_module._preserved_input_buffer = b"test data" + + # Clear it + clear_buffered_input() + + # Buffer should be empty + assert logger_module._preserved_input_buffer == b"" + + class TestFlushStdinIntegration: """Integration tests for flush_stdin in conversation flow.""" @@ -166,32 +347,80 @@ def test_flush_stdin_restores_termios_settings(self): except OSError: pass # May already be closed + def test_flush_stdin_drains_escape_sequences_only(self): + """Verify flush_stdin drains only escape sequences from stdin. + + This test writes mixed data (escape sequences + user input) to a PTY + and verifies that flush_stdin only counts escape sequences as flushed, + while preserving user input in the buffer. + """ + import pty + import time + + from openhands.sdk.logger import logger as logger_module + + # Clear any existing buffer + logger_module._preserved_input_buffer = b"" + + master_fd, slave_fd = pty.openpty() + slave_file = None + + try: + slave_file = os.fdopen(slave_fd, "r") + + # Write mixed data: CSI sequence + user text + # \x1b[5;10R is a DSR response (7 bytes) + # "hello" is user input (5 bytes) + test_data = b"\x1b[5;10Rhello" + os.write(master_fd, test_data) + + time.sleep(0.05) + + with mock.patch.object(sys, "stdin", slave_file): + bytes_flushed = flush_stdin() + + # Only the CSI sequence should be counted as flushed + assert bytes_flushed == 7, f"Expected 7 bytes flushed, got {bytes_flushed}" + + # "hello" should be preserved in buffer + buffered = get_buffered_input() + assert buffered == b"hello", f"Expected b'hello' buffered, got {buffered!r}" + + finally: + os.close(master_fd) + if slave_file is not None: + try: + slave_file.close() + except OSError: + pass + def test_flush_stdin_drains_pending_data(self): - """Verify flush_stdin actually drains pending data from stdin. + """Verify flush_stdin actually drains pending escape sequence data. - This test writes data to a PTY and verifies that flush_stdin - reads and discards it, returning the correct byte count. + This test writes pure escape sequence data to a PTY and verifies + that flush_stdin reads and discards it, returning the correct byte count. """ import pty import time - # Create a pseudo-terminal + from openhands.sdk.logger import logger as logger_module + + # Clear any existing buffer + logger_module._preserved_input_buffer = b"" + master_fd, slave_fd = pty.openpty() slave_file = None try: slave_file = os.fdopen(slave_fd, "r") - # Write some test data to the master (simulating terminal input) + # Write escape sequence data to the master (simulating terminal response) test_data = b"\x1b[5;10R" # Simulated DSR response os.write(master_fd, test_data) - # Give the data time to be available time.sleep(0.05) - # Patch stdin to use our PTY slave with mock.patch.object(sys, "stdin", slave_file): - # flush_stdin should drain the pending data bytes_flushed = flush_stdin() # Verify data was flushed @@ -199,6 +428,10 @@ def test_flush_stdin_drains_pending_data(self): f"Expected {len(test_data)} bytes flushed, got {bytes_flushed}" ) + # Nothing should be buffered (it was all escape sequences) + buffered = get_buffered_input() + assert buffered == b"", f"Expected empty buffer, got {buffered!r}" + finally: os.close(master_fd) if slave_file is not None: From ad0ae20633f2deb1246a532530a3e08e40cedbc1 Mon Sep 17 00:00:00 2001 From: John-Mason Shackelford Date: Wed, 4 Mar 2026 07:34:29 -0600 Subject: [PATCH 08/10] fix(terminal): filter terminal query sequences from captured output CLI tools like `gh` send terminal query sequences (DSR, OSC 11, etc.) as part of their progress/spinner UI. When running inside a PTY, these queries get captured as output. When displayed, the terminal processes them and responds, causing visible escape code garbage. Root cause analysis: - Diagnostic script confirmed queries are IN the captured PTY output, not generated by terminal responses - The `gh` command writes cursor position and background color queries Solution: - Add filter_terminal_queries() to strip query sequences that trigger terminal responses while preserving legitimate formatting codes - Apply filter in _get_command_output() before returning to visualizer - Queries filtered: DSR, OSC 10/11/4, DA, DA2, DECRQSS - Preserved: ANSI colors, cursor movement, bold/formatting Fixes: #2244 Co-authored-by: openhands --- .../terminal/terminal/terminal_session.py | 13 +- .../tools/terminal/utils/__init__.py | 14 +++ .../tools/terminal/utils/escape_filter.py | 83 +++++++++++++ tests/tools/terminal/test_escape_filter.py | 114 ++++++++++++++++++ 4 files changed, 223 insertions(+), 1 deletion(-) create mode 100644 openhands-tools/openhands/tools/terminal/utils/__init__.py create mode 100644 openhands-tools/openhands/tools/terminal/utils/escape_filter.py create mode 100644 tests/tools/terminal/test_escape_filter.py diff --git a/openhands-tools/openhands/tools/terminal/terminal/terminal_session.py b/openhands-tools/openhands/tools/terminal/terminal/terminal_session.py index 87d19b8dc5..f3d43e5b50 100644 --- a/openhands-tools/openhands/tools/terminal/terminal/terminal_session.py +++ b/openhands-tools/openhands/tools/terminal/terminal/terminal_session.py @@ -26,6 +26,7 @@ escape_bash_special_chars, split_bash_commands, ) +from openhands.tools.terminal.utils.escape_filter import filter_terminal_queries logger = get_logger(__name__) @@ -120,7 +121,12 @@ def _get_command_output( metadata: CmdOutputMetadata, continue_prefix: str = "", ) -> str: - """Get the command output with the previous command output removed.""" + """Get the command output with the previous command output removed. + + Also filters terminal query sequences that could cause visible escape + code garbage when the output is displayed. + See: https://github.com/OpenHands/software-agent-sdk/issues/2244 + """ # remove the previous command output from the new output if any if self.prev_output: command_output = raw_command_output.removeprefix(self.prev_output) @@ -129,6 +135,11 @@ def _get_command_output( command_output = raw_command_output self.prev_output = raw_command_output # update current command output anyway command_output = _remove_command_prefix(command_output, command) + + # Filter terminal query sequences that would cause the terminal to + # respond when displayed, producing visible garbage + command_output = filter_terminal_queries(command_output) + return command_output.rstrip() def _handle_completed_command( diff --git a/openhands-tools/openhands/tools/terminal/utils/__init__.py b/openhands-tools/openhands/tools/terminal/utils/__init__.py new file mode 100644 index 0000000000..0ebade7057 --- /dev/null +++ b/openhands-tools/openhands/tools/terminal/utils/__init__.py @@ -0,0 +1,14 @@ +"""Terminal tool utilities.""" + +from openhands.tools.terminal.utils.command import ( + escape_bash_special_chars, + split_bash_commands, +) +from openhands.tools.terminal.utils.escape_filter import filter_terminal_queries + + +__all__ = [ + "escape_bash_special_chars", + "split_bash_commands", + "filter_terminal_queries", +] diff --git a/openhands-tools/openhands/tools/terminal/utils/escape_filter.py b/openhands-tools/openhands/tools/terminal/utils/escape_filter.py new file mode 100644 index 0000000000..53543c44d2 --- /dev/null +++ b/openhands-tools/openhands/tools/terminal/utils/escape_filter.py @@ -0,0 +1,83 @@ +"""Filter terminal query sequences from captured output. + +When CLI tools (like `gh`, `npm`, etc.) run inside a PTY, they may send +terminal query sequences as part of their progress/spinner UI. These queries +get captured as output. When displayed, the terminal processes them and +responds, causing visible escape code garbage. + +This module provides filtering to remove these query sequences while +preserving legitimate formatting escape codes (colors, bold, etc.). + +See: https://github.com/OpenHands/software-agent-sdk/issues/2244 +""" + +import re + + +# Terminal query sequences that trigger responses (and cause visible garbage) +# These should be stripped from captured output before display. +# +# Reference: ECMA-48, XTerm Control Sequences +# https://invisible-island.net/xterm/ctlseqs/ctlseqs.html + +# DSR (Device Status Report) - cursor position query +# Format: ESC [ 6 n -> Response: ESC [ row ; col R +_DSR_PATTERN = re.compile(rb"\x1b\[6n") + +# OSC (Operating System Command) queries +# Format: ESC ] Ps ; Pt (BEL | ST) +# Common queries: +# OSC 10 ; ? - foreground color query +# OSC 11 ; ? - background color query +# OSC 4 ; index ; ? - palette color query +# Terminators: BEL (\x07) or ST (ESC \) +_OSC_QUERY_PATTERN = re.compile( + rb"\x1b\]" # OSC introducer + rb"(?:10|11|4)" # Color query codes (10=fg, 11=bg, 4=palette) + rb"[^" # Match until terminator + rb"\x07\x1b]*" # (not BEL or ESC) + rb"(?:\x07|\x1b\\)" # BEL or ST terminator +) + +# DA (Device Attributes) primary query +# Format: ESC [ c or ESC [ 0 c +_DA_PATTERN = re.compile(rb"\x1b\[0?c") + +# DA2 (Secondary Device Attributes) query +# Format: ESC [ > c or ESC [ > 0 c +_DA2_PATTERN = re.compile(rb"\x1b\[>0?c") + +# DECRQSS (Request Selection or Setting) - various terminal state queries +# Format: ESC P $ q ST +_DECRQSS_PATTERN = re.compile( + rb"\x1bP\$q" # DCS introducer + DECRQSS + rb"[^\x1b]*" # Setting identifier + rb"\x1b\\" # ST terminator +) + + +def filter_terminal_queries(output: str) -> str: + """Filter terminal query sequences from captured terminal output. + + Removes escape sequences that would cause the terminal to respond + when the output is displayed, while preserving legitimate formatting + sequences (colors, cursor movement, etc.). + + Args: + output: Raw terminal output that may contain query sequences. + + Returns: + Filtered output with query sequences removed. + """ + # Convert to bytes for regex matching (escape sequences are byte-level) + output_bytes = output.encode("utf-8", errors="surrogateescape") + + # Remove each type of query sequence + output_bytes = _DSR_PATTERN.sub(b"", output_bytes) + output_bytes = _OSC_QUERY_PATTERN.sub(b"", output_bytes) + output_bytes = _DA_PATTERN.sub(b"", output_bytes) + output_bytes = _DA2_PATTERN.sub(b"", output_bytes) + output_bytes = _DECRQSS_PATTERN.sub(b"", output_bytes) + + # Convert back to string + return output_bytes.decode("utf-8", errors="surrogateescape") diff --git a/tests/tools/terminal/test_escape_filter.py b/tests/tools/terminal/test_escape_filter.py new file mode 100644 index 0000000000..f82d3dc2a0 --- /dev/null +++ b/tests/tools/terminal/test_escape_filter.py @@ -0,0 +1,114 @@ +"""Tests for terminal escape sequence filtering.""" + +from openhands.tools.terminal.utils.escape_filter import filter_terminal_queries + + +class TestFilterTerminalQueries: + """Tests for filter_terminal_queries function.""" + + def test_removes_dsr_query(self): + """DSR (cursor position query) should be removed.""" + # ESC [ 6 n is the DSR query + output = "before\x1b[6nafter" + result = filter_terminal_queries(output) + assert result == "beforeafter" + + def test_removes_multiple_dsr_queries(self): + """Multiple DSR queries should all be removed.""" + output = "\x1b[6n\x1b[6ntext\x1b[6n" + result = filter_terminal_queries(output) + assert result == "text" + + def test_removes_osc_11_query_with_bel(self): + """OSC 11 (background color query) with BEL terminator should be removed.""" + # ESC ] 11 ; ? BEL + output = "before\x1b]11;?\x07after" + result = filter_terminal_queries(output) + assert result == "beforeafter" + + def test_removes_osc_11_query_with_st(self): + """OSC 11 (background color query) with ST terminator should be removed.""" + # ESC ] 11 ; ? ESC \ + output = "before\x1b]11;?\x1b\\after" + result = filter_terminal_queries(output) + assert result == "beforeafter" + + def test_removes_osc_10_query(self): + """OSC 10 (foreground color query) should be removed.""" + output = "before\x1b]10;?\x07after" + result = filter_terminal_queries(output) + assert result == "beforeafter" + + def test_removes_osc_4_query(self): + """OSC 4 (palette color query) should be removed.""" + output = "before\x1b]4;0;?\x07after" + result = filter_terminal_queries(output) + assert result == "beforeafter" + + def test_removes_da_query(self): + """DA (Device Attributes) primary query should be removed.""" + output = "before\x1b[cafter" + result = filter_terminal_queries(output) + assert result == "beforeafter" + + def test_removes_da_query_with_zero(self): + """DA query with explicit 0 parameter should be removed.""" + output = "before\x1b[0cafter" + result = filter_terminal_queries(output) + assert result == "beforeafter" + + def test_removes_da2_query(self): + """DA2 (Secondary Device Attributes) query should be removed.""" + output = "before\x1b[>cafter" + result = filter_terminal_queries(output) + assert result == "beforeafter" + + def test_preserves_color_codes(self): + """ANSI color codes should be preserved.""" + # Red text: ESC [ 31 m + output = "\x1b[31mred text\x1b[0m" + result = filter_terminal_queries(output) + assert result == "\x1b[31mred text\x1b[0m" + + def test_preserves_cursor_movement(self): + """Cursor movement sequences should be preserved.""" + # Move cursor up: ESC [ A + output = "line1\x1b[Aline2" + result = filter_terminal_queries(output) + assert result == "line1\x1b[Aline2" + + def test_preserves_bold_formatting(self): + """Bold/bright formatting should be preserved.""" + output = "\x1b[1mbold\x1b[0m" + result = filter_terminal_queries(output) + assert result == "\x1b[1mbold\x1b[0m" + + def test_real_gh_output(self): + """Test with actual captured gh command output pattern.""" + # This is a simplified version of what gh outputs + output = ( + "\x1b]11;?\x1b\\\x1b[6n" # Query sequences at start + "\nShowing 3 of 183 open pull requests\n" + "\x1b[0;32m#13199\x1b[0m feat: feature \x1b[0;36mbranch\x1b[0m" + ) + result = filter_terminal_queries(output) + # Query sequences removed, colors preserved + assert "\x1b[6n" not in result + assert "\x1b]11;?" not in result + assert "\x1b[0;32m" in result # Color preserved + assert "Showing 3 of 183" in result + + def test_empty_string(self): + """Empty string should return empty string.""" + assert filter_terminal_queries("") == "" + + def test_no_escape_sequences(self): + """Plain text without escape sequences should pass through unchanged.""" + output = "Hello, world!\nLine 2" + assert filter_terminal_queries(output) == output + + def test_unicode_content(self): + """Unicode content should be preserved.""" + output = "Hello 🌍\x1b[6n世界" + result = filter_terminal_queries(output) + assert result == "Hello 🌍世界" From b66cde55f41eaee92119bd70208dcb7e91d4a60b Mon Sep 17 00:00:00 2001 From: John-Mason Shackelford Date: Wed, 4 Mar 2026 07:35:26 -0600 Subject: [PATCH 09/10] chore: add diagnostic scripts for PR review These scripts helped identify the root cause of #2244: - diagnose_source.py: Proves escape codes are IN captured PTY output - diagnose_leak.py: Tests stdin flushing (first attempted fix) - diagnose_echo.py: Tests echo suppression (second attempted fix) - test_real_world.py: End-to-end test with agent - manual-testing.md: Testing notes and findings Co-authored-by: openhands --- .pr/diagnose_echo.py | 152 ++++++++++++++++++++++ .pr/diagnose_leak.py | 105 +++++++++++++++ .pr/diagnose_source.py | 104 +++++++++++++++ .pr/manual-testing.md | 281 +++++++++++++++++++++++++++++++++++++++++ .pr/test_real_world.py | 53 ++++++++ 5 files changed, 695 insertions(+) create mode 100644 .pr/diagnose_echo.py create mode 100644 .pr/diagnose_leak.py create mode 100644 .pr/diagnose_source.py create mode 100644 .pr/manual-testing.md create mode 100644 .pr/test_real_world.py diff --git a/.pr/diagnose_echo.py b/.pr/diagnose_echo.py new file mode 100644 index 0000000000..7ac4b19fce --- /dev/null +++ b/.pr/diagnose_echo.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python3 +"""Diagnose terminal echo of escape sequence responses. + +The problem: When terminal queries are sent, the terminal responds by: +1. Writing the response to stdin (the "leak" we're flushing) +2. ECHOING the response to stdout (the visible noise) + +This script tests whether we can suppress the echo. +""" + +import os +import select +import sys +import termios +import time + + +def send_query_normal_mode(): + """Send query in normal terminal mode - expect visible echo.""" + print("\n[Normal mode] Sending DSR query...") + sys.stdout.write("\x1b[6n") + sys.stdout.flush() + time.sleep(0.2) + + +def send_query_no_echo_mode(): + """Send query with echo disabled - should suppress visible response.""" + print("\n[No-echo mode] Sending DSR query...") + + # Save current terminal settings + old_settings = termios.tcgetattr(sys.stdin) + + try: + # Disable echo (ECHO flag) + new_settings = list(old_settings) + new_settings[3] &= ~termios.ECHO # Disable echo + termios.tcsetattr(sys.stdin, termios.TCSANOW, new_settings) + + # Send the query + sys.stdout.write("\x1b[6n") + sys.stdout.flush() + + # Wait for response + time.sleep(0.2) + + # Read and discard the response from stdin + new_settings[3] &= ~termios.ICANON # Also disable canonical mode for reading + new_settings[6][termios.VMIN] = 0 + new_settings[6][termios.VTIME] = 0 + termios.tcsetattr(sys.stdin, termios.TCSANOW, new_settings) + + if select.select([sys.stdin], [], [], 0)[0]: + data = os.read(sys.stdin.fileno(), 4096) + print(f" (Flushed {len(data)} bytes from stdin: {data!r})") + + finally: + # Restore original settings + termios.tcsetattr(sys.stdin, termios.TCSANOW, old_settings) + + print(" Query sent with echo disabled") + + +def send_query_raw_mode(): + """Send query in raw mode - full control.""" + print("\n[Raw mode] Sending DSR query...") + + old_settings = termios.tcgetattr(sys.stdin) + + try: + # Enter raw mode (no echo, no canonical, no signals) + new_settings = list(old_settings) + new_settings[3] &= ~(termios.ECHO | termios.ICANON | termios.ISIG) + new_settings[6][termios.VMIN] = 0 + new_settings[6][termios.VTIME] = 0 + termios.tcsetattr(sys.stdin, termios.TCSANOW, new_settings) + + # Send the query + sys.stdout.write("\x1b[6n") + sys.stdout.flush() + + # Wait and read response + time.sleep(0.2) + + if select.select([sys.stdin], [], [], 0)[0]: + data = os.read(sys.stdin.fileno(), 4096) + print(f" (Flushed {len(data)} bytes from stdin: {data!r})") + + finally: + termios.tcsetattr(sys.stdin, termios.TCSANOW, old_settings) + + print(" Query sent in raw mode") + + +def main(): + print("=" * 60) + print("TERMINAL ECHO DIAGNOSTIC") + print("=" * 60) + print(f"stdin.isatty(): {sys.stdin.isatty()}") + print(f"stdout.isatty(): {sys.stdout.isatty()}") + + if not sys.stdout.isatty(): + print("\n⚠️ Run directly in terminal, not piped!") + return + + # Test 1: Normal mode (expect visible echo) + send_query_normal_mode() + print(" ^ Did you see escape codes above this line?") + + # Test 2: Echo disabled + send_query_no_echo_mode() + print(" ^ Should be NO visible escape codes") + + # Test 3: Raw mode + send_query_raw_mode() + print(" ^ Should be NO visible escape codes") + + # Test 4: Multiple queries with echo suppression + print("\n[Multiple queries with echo suppressed]") + old_settings = termios.tcgetattr(sys.stdin) + try: + new_settings = list(old_settings) + new_settings[3] &= ~(termios.ECHO | termios.ICANON) + new_settings[6][termios.VMIN] = 0 + new_settings[6][termios.VTIME] = 0 + termios.tcsetattr(sys.stdin, termios.TCSANOW, new_settings) + + for i in range(3): + sys.stdout.write("\x1b[6n") # DSR + sys.stdout.write("\x1b]11;?\x07") # OSC 11 + sys.stdout.flush() + time.sleep(0.3) + + total = 0 + while select.select([sys.stdin], [], [], 0)[0]: + data = os.read(sys.stdin.fileno(), 4096) + if not data: + break + total += len(data) + print(f" Flushed {total} bytes, no visible echo") + + finally: + termios.tcsetattr(sys.stdin, termios.TCSANOW, old_settings) + + print("\n" + "=" * 60) + print("SUMMARY") + print("=" * 60) + print("If echo suppression works, we can modify flush_stdin() to") + print("disable echo BEFORE flushing, preventing visible noise.") + + +if __name__ == "__main__": + main() diff --git a/.pr/diagnose_leak.py b/.pr/diagnose_leak.py new file mode 100644 index 0000000000..c873683487 --- /dev/null +++ b/.pr/diagnose_leak.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 +"""Diagnose stdin escape code leak. + +Run this DIRECTLY in a terminal (not piped): + uv run python .pr/diagnose_leak.py + +Watch for garbage appearing AFTER the script exits. +""" + +import os +import select +import sys +import time + + +def check_stdin_raw() -> bytes: + """Read pending stdin data without blocking.""" + if not sys.stdin.isatty(): + return b"" + try: + import termios + + old = termios.tcgetattr(sys.stdin) + new = list(old) + new[3] &= ~(termios.ICANON | termios.ECHO) + new[6][termios.VMIN] = 0 + new[6][termios.VTIME] = 0 + termios.tcsetattr(sys.stdin, termios.TCSANOW, new) + data = b"" + if select.select([sys.stdin], [], [], 0)[0]: + data = os.read(sys.stdin.fileno(), 4096) + termios.tcsetattr(sys.stdin, termios.TCSANOW, old) + return data + except Exception as e: + print(f"Error: {e}") + return b"" + + +def main(): + from openhands.sdk.logger import flush_stdin, logger as logger_module + + print("=" * 60) + print("STDIN LEAK DIAGNOSTIC") + print("=" * 60) + print(f"stdin.isatty(): {sys.stdin.isatty()}") + print(f"stdout.isatty(): {sys.stdout.isatty()}") + + if not sys.stdout.isatty(): + print("\n⚠️ WARNING: stdout is not a TTY!") + print(" Terminal queries won't get responses.") + print(" Run directly in terminal, not piped.") + + # Test 1: Send query, wait, check stdin + print("\n[Test 1] Send DSR query, check stdin after 200ms") + sys.stdout.write("\x1b[6n") + sys.stdout.flush() + time.sleep(0.2) + data1 = check_stdin_raw() + print(f" stdin data: {data1!r}") + + # Test 2: Send query, flush with SDK, check what was caught + print("\n[Test 2] Send DSR query, call flush_stdin()") + sys.stdout.write("\x1b[6n") + sys.stdout.flush() + time.sleep(0.2) + flushed = flush_stdin() + preserved = logger_module._preserved_input_buffer + print(f" Bytes flushed: {flushed}") + print(f" Preserved buffer: {preserved!r}") + + # Test 3: OSC 11 query + print("\n[Test 3] Send OSC 11 query (background color)") + sys.stdout.write("\x1b]11;?\x07") + sys.stdout.flush() + time.sleep(0.2) + flushed2 = flush_stdin() + preserved2 = logger_module._preserved_input_buffer + print(f" Bytes flushed: {flushed2}") + print(f" Preserved buffer: {preserved2!r}") + + # Test 4: Multiple rapid queries (simulates Rich/gh behavior) + print("\n[Test 4] Multiple rapid queries then flush") + for _ in range(3): + sys.stdout.write("\x1b[6n") + sys.stdout.write("\x1b]11;?\x07") + sys.stdout.flush() + time.sleep(0.3) + flushed3 = flush_stdin() + preserved3 = logger_module._preserved_input_buffer + print(f" Bytes flushed: {flushed3}") + print(f" Preserved buffer: {preserved3!r}") + + # Final flush before exit + print("\n[Final] Calling flush_stdin() before exit") + final_flushed = flush_stdin() + print(f" Final bytes flushed: {final_flushed}") + + print("\n" + "=" * 60) + print("DONE - Watch for garbage on the next shell prompt!") + print("If you see '^[[...R' or 'rgb:...' after '$', the leak persists.") + print("=" * 60) + + +if __name__ == "__main__": + main() diff --git a/.pr/diagnose_source.py b/.pr/diagnose_source.py new file mode 100644 index 0000000000..38ea458f2f --- /dev/null +++ b/.pr/diagnose_source.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 +"""Diagnose: Are escape codes in PTY output or terminal responses? + +This script runs the terminal tool directly and captures its raw output +WITHOUT displaying it to the terminal. This lets us see if escape codes +are being captured as part of the command output vs generated by terminal +responses. +""" + +import re +import sys + +from openhands.tools.terminal.definition import TerminalAction +from openhands.tools.terminal.terminal.factory import create_terminal_session + + +print("=" * 60) +print("DIAGNOSTIC: Source of Escape Codes") +print("=" * 60) +print(f"stdin.isatty(): {sys.stdin.isatty()}") +print(f"stdout.isatty(): {sys.stdout.isatty()}") +print() + + +print("Creating terminal session...") +session = create_terminal_session( + work_dir="/tmp", + username=None, + no_change_timeout_seconds=30, +) +session.initialize() + +print("Running: gh pr list --repo OpenHands/openhands --limit 3") +print() + +# Execute command and capture output +action = TerminalAction( + command="gh pr list --repo OpenHands/openhands --limit 3", + is_input=False, + timeout=30, +) +result = session.execute(action) + +print("=" * 60) +print("RAW CAPTURED OUTPUT (repr):") +print("=" * 60) +# Show the raw output with escape codes visible +raw_output = result.text if hasattr(result, "text") else str(result) +print(repr(raw_output)) + +print() +print("=" * 60) +print("ESCAPE SEQUENCE ANALYSIS:") +print("=" * 60) + +# Check for common escape sequences +escape_patterns = [ + (b"\x1b[", "CSI (Control Sequence Introducer)"), + (b"\x1b]", "OSC (Operating System Command)"), + (b"\x1bP", "DCS (Device Control String)"), + (b"\x1b\\", "ST (String Terminator)"), +] + +raw_bytes = raw_output.encode("utf-8", errors="replace") +found_any = False + +for pattern, name in escape_patterns: + count = raw_bytes.count(pattern) + if count > 0: + found_any = True + print(f" Found {count}x {name} sequences") + + # Find and show the sequences + if pattern == b"\x1b[": + # CSI sequences end with a letter + matches = re.findall(rb"\x1b\[[0-9;]*[A-Za-z]", raw_bytes) + for m in matches[:5]: # Show up to 5 + print(f" - {m!r}") + elif pattern == b"\x1b]": + # OSC sequences end with BEL or ST + matches = re.findall(rb"\x1b\][^\x07\x1b]*(?:\x07|\x1b\\)", raw_bytes) + for m in matches[:5]: + print(f" - {m!r}") + +if not found_any: + print(" No escape sequences found in captured output!") + +print() +print("=" * 60) +print("CONCLUSION:") +print("=" * 60) + +if found_any: + print(" Escape codes ARE in the PTY captured output.") + print(" The gh command is writing these to its stdout.") + print(" FIX: Filter escape sequences from terminal tool output.") +else: + print(" Escape codes are NOT in captured output.") + print(" They must be coming from terminal responses to Rich queries.") + print(" FIX: Echo suppression should work (check implementation).") + +print() +session.close() +print("Session closed.") diff --git a/.pr/manual-testing.md b/.pr/manual-testing.md new file mode 100644 index 0000000000..aa151d5906 --- /dev/null +++ b/.pr/manual-testing.md @@ -0,0 +1,281 @@ +# Manual Testing Guide for PR #2245 + +**PR**: [Fix terminal escape code leak from stdin](https://github.com/OpenHands/software-agent-sdk/pull/2245) +**Branch**: `fix/stdin-escape-code-leak` +**Issue**: #2244 + +--- + +## ⚠️ INVESTIGATION FINDINGS + +### Root Cause Analysis (2026-03-04) + +The escape codes appearing in the test output are **NOT stdin leaks** - they are being captured as part of the terminal tool's PTY output stream. + +**What's happening:** +1. The `gh` command (or other CLI tools with spinners) sends terminal queries: + - DSR (`\x1b[6n`) - cursor position request + - OSC 11 (`\x1b]11;?`) - background color request + +2. The terminal responds by writing escape sequences to the PTY + +3. The SDK's terminal tool captures ALL PTY output, including these terminal responses + +4. The captured output (including escape codes) is displayed by the visualizer + +**Evidence:** The escape codes appeared IN THE MIDDLE of the conversation output (between "Exit code: 0" and "Message from Agent"), not after the script exited. + +**Implication:** The `flush_stdin()` fix addresses a DIFFERENT problem (stdin corruption for interactive CLI apps). The escape codes in the test output are a PTY output filtering issue, not a stdin leak. + +### Two Separate Issues: + +1. **stdin leak** (what PR #2245 fixes): Terminal responses accumulating in stdin, corrupting subsequent `input()` calls or appearing as garbage AFTER script exits + +2. **PTY output pollution** (what we observed): Terminal responses being captured as part of command output and displayed by the visualizer + +### Recommendation: + +The PTY output issue should be addressed separately, possibly by: +- Filtering escape sequences from captured terminal output +- Setting terminal environment variables to disable queries (e.g., `TERM=dumb`) +- Using `--no-pager` or similar flags for CLI tools + +--- + +## Environment Status ✅ + +- [x] On correct branch: `fix/stdin-escape-code-leak` +- [x] Branch up to date with origin +- [x] Dependencies synced (`uv sync --dev`) +- [x] LLM_API_KEY configured +- [x] LLM_BASE_URL: `https://llm-proxy.app.all-hands.dev/` + +## Quick Commands + +```bash +# Ensure you're on the right branch +git checkout fix/stdin-escape-code-leak +git pull + +# Sync dependencies +uv sync --dev + +# Run automated tests first +uv run pytest tests/sdk/logger/test_flush_stdin.py -v +``` + +--- + +## Test 1: Minimal Reproduction (No API Key Needed) + +This test directly injects terminal queries to verify the fix works: + +```bash +cd /Users/jpshack/code/all-hands/software-agent-sdk +uv run python - <<'EOF' +#!/usr/bin/env python3 +"""Minimal reproduction of ANSI escape code leak.""" + +import os +import select +import sys +import time + +def check_stdin() -> bytes: + """Check for pending data in stdin (non-blocking).""" + if not sys.stdin.isatty(): + return b"" + try: + import termios + old = termios.tcgetattr(sys.stdin) + new = list(old) + new[3] &= ~(termios.ICANON | termios.ECHO) + new[6][termios.VMIN] = 0 + new[6][termios.VTIME] = 0 + termios.tcsetattr(sys.stdin, termios.TCSANOW, new) + data = b"" + if select.select([sys.stdin], [], [], 0)[0]: + data = os.read(sys.stdin.fileno(), 4096) + termios.tcsetattr(sys.stdin, termios.TCSANOW, old) + return data + except Exception: + return b"" + +def main(): + print("ANSI Escape Code Leak Test") + print("=" * 40) + + if not sys.stdout.isatty(): + print("ERROR: Run in a real terminal (not CI/piped)") + sys.exit(1) + + for turn in range(1, 4): + print(f"\n--- Turn {turn} ---") + # Send DSR query (cursor position request) + # Terminal responds with: ESC [ row ; col R + print(" Sending DSR query (\\x1b[6n)...") + sys.stdout.write("\x1b[6n") + sys.stdout.flush() + + time.sleep(0.1) + + leaked = check_stdin() + if leaked: + print(f" LEAK DETECTED: {len(leaked)} bytes: {leaked!r}") + else: + print(" No leak (terminal may not respond)") + + print("\n" + "=" * 40) + print("If you see ';1R' or similar after this script exits, that's the bug.") + +if __name__ == "__main__": + main() +EOF +``` + +**Expected results BEFORE fix**: Leak detected, garbage in prompt after exit +**Expected results AFTER fix**: Still shows leaks (this is the raw terminal behavior - the fix is in the SDK layer) + +--- + +## Test 2: SDK Agent Test with flush_stdin() + +This tests the actual fix by running an agent with terminal tools: + +```bash +cd /Users/jpshack/code/all-hands/software-agent-sdk +uv run python - <<'EOF' +#!/usr/bin/env python3 +"""Real-world reproduction using SDK agent.""" + +import os +import sys + +from openhands.sdk import Agent, Conversation, LLM, Tool +from openhands.tools.terminal import TerminalTool + +llm = LLM( + model=os.environ.get("LLM_MODEL", "claude-sonnet-4-20250514"), + api_key=os.environ["LLM_API_KEY"], + base_url=os.environ.get("LLM_BASE_URL"), +) + +agent = Agent(llm=llm, tools=[Tool(name=TerminalTool.name)]) +conversation = Conversation(agent=agent, workspace="/tmp") + +# Commands with spinners (like gh) trigger more queries +print("Sending message to agent...") +conversation.send_message("Run: gh pr list --repo OpenHands/openhands --limit 3") +conversation.run() +conversation.close() + +print("\n" + "=" * 40) +print("Check for garbage in your shell prompt after this exits.") +print("With the fix, there should be NO garbage like ';1R' or 'rgb:...'") +EOF +``` + +**Expected results AFTER fix**: No garbage in terminal prompt after script exits + +--- + +## Test 3: Multiple Conversation Turns + +Tests that stdin is properly flushed between conversation turns: + +```bash +cd /Users/jpshack/code/all-hands/software-agent-sdk +uv run python - <<'EOF' +#!/usr/bin/env python3 +"""Multi-turn conversation test.""" + +import os +from openhands.sdk import Agent, Conversation, LLM, Tool +from openhands.tools.terminal import TerminalTool + +llm = LLM( + model=os.environ.get("LLM_MODEL", "claude-sonnet-4-20250514"), + api_key=os.environ["LLM_API_KEY"], + base_url=os.environ.get("LLM_BASE_URL"), +) + +agent = Agent(llm=llm, tools=[Tool(name=TerminalTool.name)]) +conversation = Conversation(agent=agent, workspace="/tmp") + +# Multiple turns to accumulate potential leaks +for i, cmd in enumerate(["echo 'Turn 1'", "ls -la /tmp | head -5", "echo 'Done'"]): + print(f"\n{'='*40}\nTurn {i+1}: {cmd}\n{'='*40}") + conversation.send_message(f"Run: {cmd}") + conversation.run() + +conversation.close() +print("\n" + "=" * 40) +print("Multiple turns complete. Check for accumulated garbage.") +EOF +``` + +--- + +## Test 4: Automated Unit Tests + +```bash +# Run the flush_stdin tests +uv run pytest tests/sdk/logger/test_flush_stdin.py -v + +# Run all SDK logger tests +uv run pytest tests/sdk/logger/ -v + +# Run with coverage +uv run pytest tests/sdk/logger/test_flush_stdin.py -v --cov=openhands.sdk.logger --cov-report=term-missing +``` + +--- + +## Comparison Testing (Before/After) + +To compare behavior with and without the fix: + +```bash +# Test on main branch (before fix) +git stash +git checkout main +uv sync --dev +# Run Test 2 above - expect garbage in prompt + +# Test on fix branch (after fix) +git checkout fix/stdin-escape-code-leak +git stash pop +uv sync --dev +# Run Test 2 above - expect NO garbage +``` + +--- + +## What to Look For + +### ✅ Success indicators: +- No escape code garbage (`^[[23;1R`, `;1R`, `rgb:...`) in terminal after script exits +- `input()` calls work correctly without pre-filled garbage +- Automated tests pass + +### ❌ Failure indicators: +- Garbage characters appearing in shell prompt after script exits +- `input()` returning unexpected content +- Terminal mode corruption (echo off, raw mode stuck) + +--- + +## Files Changed in This PR + +``` +openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py # Calls flush_stdin() +openhands-sdk/openhands/sdk/conversation/visualizer/default.py # Calls flush_stdin() +openhands-sdk/openhands/sdk/logger/__init__.py # Exports flush_stdin +openhands-sdk/openhands/sdk/logger/logger.py # Implements flush_stdin() +tests/sdk/logger/test_flush_stdin.py # Unit tests +``` + +To review the implementation: +```bash +git --no-pager show HEAD -- openhands-sdk/openhands/sdk/logger/logger.py | head -300 +``` diff --git a/.pr/test_real_world.py b/.pr/test_real_world.py new file mode 100644 index 0000000000..e40d8fd2c0 --- /dev/null +++ b/.pr/test_real_world.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +"""Real-world test for terminal query filtering fix. + +Tests that terminal query sequences (DSR, OSC 11, etc.) are filtered from +captured terminal output before display, preventing visible escape code garbage. + +See: https://github.com/OpenHands/software-agent-sdk/issues/2244 +""" + +import os +import sys + +from openhands.sdk import LLM, Agent, Conversation, Tool +from openhands.tools.terminal import TerminalTool + + +print("=" * 60) +print("REAL-WORLD TEST: Terminal Query Filtering Fix") +print("=" * 60) +print(f"stdin.isatty(): {sys.stdin.isatty()}") +print(f"stdout.isatty(): {sys.stdout.isatty()}") +print() + +llm = LLM( + model=os.environ.get("LLM_MODEL", "claude-sonnet-4-20250514"), + api_key=os.environ["LLM_API_KEY"], + base_url=os.environ.get("LLM_BASE_URL"), +) + +agent = Agent(llm=llm, tools=[Tool(name=TerminalTool.name)]) +conversation = Conversation(agent=agent, workspace="/tmp") + +# Commands with spinners (like gh) send terminal queries that would +# cause visible garbage if not filtered +print(">>> Sending message to agent...") +print(">>> The gh command sends terminal queries - these should be filtered") +print() + +conversation.send_message("Run: gh pr list --repo OpenHands/openhands --limit 3") +conversation.run() +conversation.close() + +print() +print("=" * 60) +print("TEST COMPLETE") +print("=" * 60) +print() +print("SUCCESS CRITERIA:") +print(" 1. NO visible escape codes (^[[...R, rgb:...) in the output above") +print(" 2. NO garbage on the shell prompt after this script exits") +print(" 3. Colors in the gh output should still be visible (if terminal supports)") +print() +print("The fix filters terminal QUERY sequences while preserving formatting.") From e19a0aadc1d77779387f64fdc39288c49a97c4a3 Mon Sep 17 00:00:00 2001 From: allhands-bot Date: Wed, 11 Mar 2026 17:13:33 +0000 Subject: [PATCH 10/10] chore: Remove PR-only artifacts [automated] --- .pr/diagnose_echo.py | 152 ---------------------- .pr/diagnose_leak.py | 105 --------------- .pr/diagnose_source.py | 104 --------------- .pr/manual-testing.md | 281 ----------------------------------------- .pr/test_real_world.py | 53 -------- 5 files changed, 695 deletions(-) delete mode 100644 .pr/diagnose_echo.py delete mode 100644 .pr/diagnose_leak.py delete mode 100644 .pr/diagnose_source.py delete mode 100644 .pr/manual-testing.md delete mode 100644 .pr/test_real_world.py diff --git a/.pr/diagnose_echo.py b/.pr/diagnose_echo.py deleted file mode 100644 index 7ac4b19fce..0000000000 --- a/.pr/diagnose_echo.py +++ /dev/null @@ -1,152 +0,0 @@ -#!/usr/bin/env python3 -"""Diagnose terminal echo of escape sequence responses. - -The problem: When terminal queries are sent, the terminal responds by: -1. Writing the response to stdin (the "leak" we're flushing) -2. ECHOING the response to stdout (the visible noise) - -This script tests whether we can suppress the echo. -""" - -import os -import select -import sys -import termios -import time - - -def send_query_normal_mode(): - """Send query in normal terminal mode - expect visible echo.""" - print("\n[Normal mode] Sending DSR query...") - sys.stdout.write("\x1b[6n") - sys.stdout.flush() - time.sleep(0.2) - - -def send_query_no_echo_mode(): - """Send query with echo disabled - should suppress visible response.""" - print("\n[No-echo mode] Sending DSR query...") - - # Save current terminal settings - old_settings = termios.tcgetattr(sys.stdin) - - try: - # Disable echo (ECHO flag) - new_settings = list(old_settings) - new_settings[3] &= ~termios.ECHO # Disable echo - termios.tcsetattr(sys.stdin, termios.TCSANOW, new_settings) - - # Send the query - sys.stdout.write("\x1b[6n") - sys.stdout.flush() - - # Wait for response - time.sleep(0.2) - - # Read and discard the response from stdin - new_settings[3] &= ~termios.ICANON # Also disable canonical mode for reading - new_settings[6][termios.VMIN] = 0 - new_settings[6][termios.VTIME] = 0 - termios.tcsetattr(sys.stdin, termios.TCSANOW, new_settings) - - if select.select([sys.stdin], [], [], 0)[0]: - data = os.read(sys.stdin.fileno(), 4096) - print(f" (Flushed {len(data)} bytes from stdin: {data!r})") - - finally: - # Restore original settings - termios.tcsetattr(sys.stdin, termios.TCSANOW, old_settings) - - print(" Query sent with echo disabled") - - -def send_query_raw_mode(): - """Send query in raw mode - full control.""" - print("\n[Raw mode] Sending DSR query...") - - old_settings = termios.tcgetattr(sys.stdin) - - try: - # Enter raw mode (no echo, no canonical, no signals) - new_settings = list(old_settings) - new_settings[3] &= ~(termios.ECHO | termios.ICANON | termios.ISIG) - new_settings[6][termios.VMIN] = 0 - new_settings[6][termios.VTIME] = 0 - termios.tcsetattr(sys.stdin, termios.TCSANOW, new_settings) - - # Send the query - sys.stdout.write("\x1b[6n") - sys.stdout.flush() - - # Wait and read response - time.sleep(0.2) - - if select.select([sys.stdin], [], [], 0)[0]: - data = os.read(sys.stdin.fileno(), 4096) - print(f" (Flushed {len(data)} bytes from stdin: {data!r})") - - finally: - termios.tcsetattr(sys.stdin, termios.TCSANOW, old_settings) - - print(" Query sent in raw mode") - - -def main(): - print("=" * 60) - print("TERMINAL ECHO DIAGNOSTIC") - print("=" * 60) - print(f"stdin.isatty(): {sys.stdin.isatty()}") - print(f"stdout.isatty(): {sys.stdout.isatty()}") - - if not sys.stdout.isatty(): - print("\n⚠️ Run directly in terminal, not piped!") - return - - # Test 1: Normal mode (expect visible echo) - send_query_normal_mode() - print(" ^ Did you see escape codes above this line?") - - # Test 2: Echo disabled - send_query_no_echo_mode() - print(" ^ Should be NO visible escape codes") - - # Test 3: Raw mode - send_query_raw_mode() - print(" ^ Should be NO visible escape codes") - - # Test 4: Multiple queries with echo suppression - print("\n[Multiple queries with echo suppressed]") - old_settings = termios.tcgetattr(sys.stdin) - try: - new_settings = list(old_settings) - new_settings[3] &= ~(termios.ECHO | termios.ICANON) - new_settings[6][termios.VMIN] = 0 - new_settings[6][termios.VTIME] = 0 - termios.tcsetattr(sys.stdin, termios.TCSANOW, new_settings) - - for i in range(3): - sys.stdout.write("\x1b[6n") # DSR - sys.stdout.write("\x1b]11;?\x07") # OSC 11 - sys.stdout.flush() - time.sleep(0.3) - - total = 0 - while select.select([sys.stdin], [], [], 0)[0]: - data = os.read(sys.stdin.fileno(), 4096) - if not data: - break - total += len(data) - print(f" Flushed {total} bytes, no visible echo") - - finally: - termios.tcsetattr(sys.stdin, termios.TCSANOW, old_settings) - - print("\n" + "=" * 60) - print("SUMMARY") - print("=" * 60) - print("If echo suppression works, we can modify flush_stdin() to") - print("disable echo BEFORE flushing, preventing visible noise.") - - -if __name__ == "__main__": - main() diff --git a/.pr/diagnose_leak.py b/.pr/diagnose_leak.py deleted file mode 100644 index c873683487..0000000000 --- a/.pr/diagnose_leak.py +++ /dev/null @@ -1,105 +0,0 @@ -#!/usr/bin/env python3 -"""Diagnose stdin escape code leak. - -Run this DIRECTLY in a terminal (not piped): - uv run python .pr/diagnose_leak.py - -Watch for garbage appearing AFTER the script exits. -""" - -import os -import select -import sys -import time - - -def check_stdin_raw() -> bytes: - """Read pending stdin data without blocking.""" - if not sys.stdin.isatty(): - return b"" - try: - import termios - - old = termios.tcgetattr(sys.stdin) - new = list(old) - new[3] &= ~(termios.ICANON | termios.ECHO) - new[6][termios.VMIN] = 0 - new[6][termios.VTIME] = 0 - termios.tcsetattr(sys.stdin, termios.TCSANOW, new) - data = b"" - if select.select([sys.stdin], [], [], 0)[0]: - data = os.read(sys.stdin.fileno(), 4096) - termios.tcsetattr(sys.stdin, termios.TCSANOW, old) - return data - except Exception as e: - print(f"Error: {e}") - return b"" - - -def main(): - from openhands.sdk.logger import flush_stdin, logger as logger_module - - print("=" * 60) - print("STDIN LEAK DIAGNOSTIC") - print("=" * 60) - print(f"stdin.isatty(): {sys.stdin.isatty()}") - print(f"stdout.isatty(): {sys.stdout.isatty()}") - - if not sys.stdout.isatty(): - print("\n⚠️ WARNING: stdout is not a TTY!") - print(" Terminal queries won't get responses.") - print(" Run directly in terminal, not piped.") - - # Test 1: Send query, wait, check stdin - print("\n[Test 1] Send DSR query, check stdin after 200ms") - sys.stdout.write("\x1b[6n") - sys.stdout.flush() - time.sleep(0.2) - data1 = check_stdin_raw() - print(f" stdin data: {data1!r}") - - # Test 2: Send query, flush with SDK, check what was caught - print("\n[Test 2] Send DSR query, call flush_stdin()") - sys.stdout.write("\x1b[6n") - sys.stdout.flush() - time.sleep(0.2) - flushed = flush_stdin() - preserved = logger_module._preserved_input_buffer - print(f" Bytes flushed: {flushed}") - print(f" Preserved buffer: {preserved!r}") - - # Test 3: OSC 11 query - print("\n[Test 3] Send OSC 11 query (background color)") - sys.stdout.write("\x1b]11;?\x07") - sys.stdout.flush() - time.sleep(0.2) - flushed2 = flush_stdin() - preserved2 = logger_module._preserved_input_buffer - print(f" Bytes flushed: {flushed2}") - print(f" Preserved buffer: {preserved2!r}") - - # Test 4: Multiple rapid queries (simulates Rich/gh behavior) - print("\n[Test 4] Multiple rapid queries then flush") - for _ in range(3): - sys.stdout.write("\x1b[6n") - sys.stdout.write("\x1b]11;?\x07") - sys.stdout.flush() - time.sleep(0.3) - flushed3 = flush_stdin() - preserved3 = logger_module._preserved_input_buffer - print(f" Bytes flushed: {flushed3}") - print(f" Preserved buffer: {preserved3!r}") - - # Final flush before exit - print("\n[Final] Calling flush_stdin() before exit") - final_flushed = flush_stdin() - print(f" Final bytes flushed: {final_flushed}") - - print("\n" + "=" * 60) - print("DONE - Watch for garbage on the next shell prompt!") - print("If you see '^[[...R' or 'rgb:...' after '$', the leak persists.") - print("=" * 60) - - -if __name__ == "__main__": - main() diff --git a/.pr/diagnose_source.py b/.pr/diagnose_source.py deleted file mode 100644 index 38ea458f2f..0000000000 --- a/.pr/diagnose_source.py +++ /dev/null @@ -1,104 +0,0 @@ -#!/usr/bin/env python3 -"""Diagnose: Are escape codes in PTY output or terminal responses? - -This script runs the terminal tool directly and captures its raw output -WITHOUT displaying it to the terminal. This lets us see if escape codes -are being captured as part of the command output vs generated by terminal -responses. -""" - -import re -import sys - -from openhands.tools.terminal.definition import TerminalAction -from openhands.tools.terminal.terminal.factory import create_terminal_session - - -print("=" * 60) -print("DIAGNOSTIC: Source of Escape Codes") -print("=" * 60) -print(f"stdin.isatty(): {sys.stdin.isatty()}") -print(f"stdout.isatty(): {sys.stdout.isatty()}") -print() - - -print("Creating terminal session...") -session = create_terminal_session( - work_dir="/tmp", - username=None, - no_change_timeout_seconds=30, -) -session.initialize() - -print("Running: gh pr list --repo OpenHands/openhands --limit 3") -print() - -# Execute command and capture output -action = TerminalAction( - command="gh pr list --repo OpenHands/openhands --limit 3", - is_input=False, - timeout=30, -) -result = session.execute(action) - -print("=" * 60) -print("RAW CAPTURED OUTPUT (repr):") -print("=" * 60) -# Show the raw output with escape codes visible -raw_output = result.text if hasattr(result, "text") else str(result) -print(repr(raw_output)) - -print() -print("=" * 60) -print("ESCAPE SEQUENCE ANALYSIS:") -print("=" * 60) - -# Check for common escape sequences -escape_patterns = [ - (b"\x1b[", "CSI (Control Sequence Introducer)"), - (b"\x1b]", "OSC (Operating System Command)"), - (b"\x1bP", "DCS (Device Control String)"), - (b"\x1b\\", "ST (String Terminator)"), -] - -raw_bytes = raw_output.encode("utf-8", errors="replace") -found_any = False - -for pattern, name in escape_patterns: - count = raw_bytes.count(pattern) - if count > 0: - found_any = True - print(f" Found {count}x {name} sequences") - - # Find and show the sequences - if pattern == b"\x1b[": - # CSI sequences end with a letter - matches = re.findall(rb"\x1b\[[0-9;]*[A-Za-z]", raw_bytes) - for m in matches[:5]: # Show up to 5 - print(f" - {m!r}") - elif pattern == b"\x1b]": - # OSC sequences end with BEL or ST - matches = re.findall(rb"\x1b\][^\x07\x1b]*(?:\x07|\x1b\\)", raw_bytes) - for m in matches[:5]: - print(f" - {m!r}") - -if not found_any: - print(" No escape sequences found in captured output!") - -print() -print("=" * 60) -print("CONCLUSION:") -print("=" * 60) - -if found_any: - print(" Escape codes ARE in the PTY captured output.") - print(" The gh command is writing these to its stdout.") - print(" FIX: Filter escape sequences from terminal tool output.") -else: - print(" Escape codes are NOT in captured output.") - print(" They must be coming from terminal responses to Rich queries.") - print(" FIX: Echo suppression should work (check implementation).") - -print() -session.close() -print("Session closed.") diff --git a/.pr/manual-testing.md b/.pr/manual-testing.md deleted file mode 100644 index aa151d5906..0000000000 --- a/.pr/manual-testing.md +++ /dev/null @@ -1,281 +0,0 @@ -# Manual Testing Guide for PR #2245 - -**PR**: [Fix terminal escape code leak from stdin](https://github.com/OpenHands/software-agent-sdk/pull/2245) -**Branch**: `fix/stdin-escape-code-leak` -**Issue**: #2244 - ---- - -## ⚠️ INVESTIGATION FINDINGS - -### Root Cause Analysis (2026-03-04) - -The escape codes appearing in the test output are **NOT stdin leaks** - they are being captured as part of the terminal tool's PTY output stream. - -**What's happening:** -1. The `gh` command (or other CLI tools with spinners) sends terminal queries: - - DSR (`\x1b[6n`) - cursor position request - - OSC 11 (`\x1b]11;?`) - background color request - -2. The terminal responds by writing escape sequences to the PTY - -3. The SDK's terminal tool captures ALL PTY output, including these terminal responses - -4. The captured output (including escape codes) is displayed by the visualizer - -**Evidence:** The escape codes appeared IN THE MIDDLE of the conversation output (between "Exit code: 0" and "Message from Agent"), not after the script exited. - -**Implication:** The `flush_stdin()` fix addresses a DIFFERENT problem (stdin corruption for interactive CLI apps). The escape codes in the test output are a PTY output filtering issue, not a stdin leak. - -### Two Separate Issues: - -1. **stdin leak** (what PR #2245 fixes): Terminal responses accumulating in stdin, corrupting subsequent `input()` calls or appearing as garbage AFTER script exits - -2. **PTY output pollution** (what we observed): Terminal responses being captured as part of command output and displayed by the visualizer - -### Recommendation: - -The PTY output issue should be addressed separately, possibly by: -- Filtering escape sequences from captured terminal output -- Setting terminal environment variables to disable queries (e.g., `TERM=dumb`) -- Using `--no-pager` or similar flags for CLI tools - ---- - -## Environment Status ✅ - -- [x] On correct branch: `fix/stdin-escape-code-leak` -- [x] Branch up to date with origin -- [x] Dependencies synced (`uv sync --dev`) -- [x] LLM_API_KEY configured -- [x] LLM_BASE_URL: `https://llm-proxy.app.all-hands.dev/` - -## Quick Commands - -```bash -# Ensure you're on the right branch -git checkout fix/stdin-escape-code-leak -git pull - -# Sync dependencies -uv sync --dev - -# Run automated tests first -uv run pytest tests/sdk/logger/test_flush_stdin.py -v -``` - ---- - -## Test 1: Minimal Reproduction (No API Key Needed) - -This test directly injects terminal queries to verify the fix works: - -```bash -cd /Users/jpshack/code/all-hands/software-agent-sdk -uv run python - <<'EOF' -#!/usr/bin/env python3 -"""Minimal reproduction of ANSI escape code leak.""" - -import os -import select -import sys -import time - -def check_stdin() -> bytes: - """Check for pending data in stdin (non-blocking).""" - if not sys.stdin.isatty(): - return b"" - try: - import termios - old = termios.tcgetattr(sys.stdin) - new = list(old) - new[3] &= ~(termios.ICANON | termios.ECHO) - new[6][termios.VMIN] = 0 - new[6][termios.VTIME] = 0 - termios.tcsetattr(sys.stdin, termios.TCSANOW, new) - data = b"" - if select.select([sys.stdin], [], [], 0)[0]: - data = os.read(sys.stdin.fileno(), 4096) - termios.tcsetattr(sys.stdin, termios.TCSANOW, old) - return data - except Exception: - return b"" - -def main(): - print("ANSI Escape Code Leak Test") - print("=" * 40) - - if not sys.stdout.isatty(): - print("ERROR: Run in a real terminal (not CI/piped)") - sys.exit(1) - - for turn in range(1, 4): - print(f"\n--- Turn {turn} ---") - # Send DSR query (cursor position request) - # Terminal responds with: ESC [ row ; col R - print(" Sending DSR query (\\x1b[6n)...") - sys.stdout.write("\x1b[6n") - sys.stdout.flush() - - time.sleep(0.1) - - leaked = check_stdin() - if leaked: - print(f" LEAK DETECTED: {len(leaked)} bytes: {leaked!r}") - else: - print(" No leak (terminal may not respond)") - - print("\n" + "=" * 40) - print("If you see ';1R' or similar after this script exits, that's the bug.") - -if __name__ == "__main__": - main() -EOF -``` - -**Expected results BEFORE fix**: Leak detected, garbage in prompt after exit -**Expected results AFTER fix**: Still shows leaks (this is the raw terminal behavior - the fix is in the SDK layer) - ---- - -## Test 2: SDK Agent Test with flush_stdin() - -This tests the actual fix by running an agent with terminal tools: - -```bash -cd /Users/jpshack/code/all-hands/software-agent-sdk -uv run python - <<'EOF' -#!/usr/bin/env python3 -"""Real-world reproduction using SDK agent.""" - -import os -import sys - -from openhands.sdk import Agent, Conversation, LLM, Tool -from openhands.tools.terminal import TerminalTool - -llm = LLM( - model=os.environ.get("LLM_MODEL", "claude-sonnet-4-20250514"), - api_key=os.environ["LLM_API_KEY"], - base_url=os.environ.get("LLM_BASE_URL"), -) - -agent = Agent(llm=llm, tools=[Tool(name=TerminalTool.name)]) -conversation = Conversation(agent=agent, workspace="/tmp") - -# Commands with spinners (like gh) trigger more queries -print("Sending message to agent...") -conversation.send_message("Run: gh pr list --repo OpenHands/openhands --limit 3") -conversation.run() -conversation.close() - -print("\n" + "=" * 40) -print("Check for garbage in your shell prompt after this exits.") -print("With the fix, there should be NO garbage like ';1R' or 'rgb:...'") -EOF -``` - -**Expected results AFTER fix**: No garbage in terminal prompt after script exits - ---- - -## Test 3: Multiple Conversation Turns - -Tests that stdin is properly flushed between conversation turns: - -```bash -cd /Users/jpshack/code/all-hands/software-agent-sdk -uv run python - <<'EOF' -#!/usr/bin/env python3 -"""Multi-turn conversation test.""" - -import os -from openhands.sdk import Agent, Conversation, LLM, Tool -from openhands.tools.terminal import TerminalTool - -llm = LLM( - model=os.environ.get("LLM_MODEL", "claude-sonnet-4-20250514"), - api_key=os.environ["LLM_API_KEY"], - base_url=os.environ.get("LLM_BASE_URL"), -) - -agent = Agent(llm=llm, tools=[Tool(name=TerminalTool.name)]) -conversation = Conversation(agent=agent, workspace="/tmp") - -# Multiple turns to accumulate potential leaks -for i, cmd in enumerate(["echo 'Turn 1'", "ls -la /tmp | head -5", "echo 'Done'"]): - print(f"\n{'='*40}\nTurn {i+1}: {cmd}\n{'='*40}") - conversation.send_message(f"Run: {cmd}") - conversation.run() - -conversation.close() -print("\n" + "=" * 40) -print("Multiple turns complete. Check for accumulated garbage.") -EOF -``` - ---- - -## Test 4: Automated Unit Tests - -```bash -# Run the flush_stdin tests -uv run pytest tests/sdk/logger/test_flush_stdin.py -v - -# Run all SDK logger tests -uv run pytest tests/sdk/logger/ -v - -# Run with coverage -uv run pytest tests/sdk/logger/test_flush_stdin.py -v --cov=openhands.sdk.logger --cov-report=term-missing -``` - ---- - -## Comparison Testing (Before/After) - -To compare behavior with and without the fix: - -```bash -# Test on main branch (before fix) -git stash -git checkout main -uv sync --dev -# Run Test 2 above - expect garbage in prompt - -# Test on fix branch (after fix) -git checkout fix/stdin-escape-code-leak -git stash pop -uv sync --dev -# Run Test 2 above - expect NO garbage -``` - ---- - -## What to Look For - -### ✅ Success indicators: -- No escape code garbage (`^[[23;1R`, `;1R`, `rgb:...`) in terminal after script exits -- `input()` calls work correctly without pre-filled garbage -- Automated tests pass - -### ❌ Failure indicators: -- Garbage characters appearing in shell prompt after script exits -- `input()` returning unexpected content -- Terminal mode corruption (echo off, raw mode stuck) - ---- - -## Files Changed in This PR - -``` -openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py # Calls flush_stdin() -openhands-sdk/openhands/sdk/conversation/visualizer/default.py # Calls flush_stdin() -openhands-sdk/openhands/sdk/logger/__init__.py # Exports flush_stdin -openhands-sdk/openhands/sdk/logger/logger.py # Implements flush_stdin() -tests/sdk/logger/test_flush_stdin.py # Unit tests -``` - -To review the implementation: -```bash -git --no-pager show HEAD -- openhands-sdk/openhands/sdk/logger/logger.py | head -300 -``` diff --git a/.pr/test_real_world.py b/.pr/test_real_world.py deleted file mode 100644 index e40d8fd2c0..0000000000 --- a/.pr/test_real_world.py +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python3 -"""Real-world test for terminal query filtering fix. - -Tests that terminal query sequences (DSR, OSC 11, etc.) are filtered from -captured terminal output before display, preventing visible escape code garbage. - -See: https://github.com/OpenHands/software-agent-sdk/issues/2244 -""" - -import os -import sys - -from openhands.sdk import LLM, Agent, Conversation, Tool -from openhands.tools.terminal import TerminalTool - - -print("=" * 60) -print("REAL-WORLD TEST: Terminal Query Filtering Fix") -print("=" * 60) -print(f"stdin.isatty(): {sys.stdin.isatty()}") -print(f"stdout.isatty(): {sys.stdout.isatty()}") -print() - -llm = LLM( - model=os.environ.get("LLM_MODEL", "claude-sonnet-4-20250514"), - api_key=os.environ["LLM_API_KEY"], - base_url=os.environ.get("LLM_BASE_URL"), -) - -agent = Agent(llm=llm, tools=[Tool(name=TerminalTool.name)]) -conversation = Conversation(agent=agent, workspace="/tmp") - -# Commands with spinners (like gh) send terminal queries that would -# cause visible garbage if not filtered -print(">>> Sending message to agent...") -print(">>> The gh command sends terminal queries - these should be filtered") -print() - -conversation.send_message("Run: gh pr list --repo OpenHands/openhands --limit 3") -conversation.run() -conversation.close() - -print() -print("=" * 60) -print("TEST COMPLETE") -print("=" * 60) -print() -print("SUCCESS CRITERIA:") -print(" 1. NO visible escape codes (^[[...R, rgb:...) in the output above") -print(" 2. NO garbage on the shell prompt after this script exits") -print(" 3. Colors in the gh output should still be visible (if terminal supports)") -print() -print("The fix filters terminal QUERY sequences while preserving formatting.")