Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions .pr/test_real_world.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/usr/bin/env python3
"""Real-world test for terminal query filtering fix.

Tests that terminal query sequences (DSR, OSC 11, etc.) are filtered from
captured terminal output before display, preventing visible escape code garbage.

Usage:
# With All-Hands LLM proxy:
LLM_BASE_URL="https://llm-proxy.eval.all-hands.dev" LLM_API_KEY="$LLM_API_KEY" \
uv run python .pr/test_real_world.py

# With direct API:
LLM_API_KEY="your-key" uv run python .pr/test_real_world.py

See: https://github.com/OpenHands/software-agent-sdk/issues/2244
"""

import os
import sys

from openhands.sdk import LLM, Agent, Conversation, Tool
from openhands.tools.terminal import TerminalTool


print("=" * 60)
print("REAL-WORLD TEST: Terminal Query Filtering Fix")
print("=" * 60)
print(f"stdin.isatty(): {sys.stdin.isatty()}")
print(f"stdout.isatty(): {sys.stdout.isatty()}")
print()

llm = LLM(
model=os.environ.get("LLM_MODEL", "claude-sonnet-4-20250514"),
api_key=os.environ["LLM_API_KEY"],
base_url=os.environ.get("LLM_BASE_URL"),
)

agent = Agent(llm=llm, tools=[Tool(name=TerminalTool.name)])
conversation = Conversation(agent=agent, workspace="/tmp")

# Commands with spinners (like gh) send terminal queries that would
# cause visible garbage if not filtered
print(">>> Sending message to agent...")
print(">>> The gh command sends terminal queries - these should be filtered")
print()

conversation.send_message("Run: gh pr list --repo OpenHands/openhands --limit 3")
conversation.run()
conversation.close()

print()
print("=" * 60)
print("TEST COMPLETE")
print("=" * 60)
print()
print("SUCCESS CRITERIA:")
print(" 1. NO visible escape codes (^[[...R, rgb:...) in the output above")
print(" 2. NO garbage on the shell prompt after this script exits")
print(" 3. Colors in the gh output should still be visible (if terminal supports)")
print()
print("The fix filters terminal QUERY sequences while preserving formatting.")
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
escape_bash_special_chars,
split_bash_commands,
)
from openhands.tools.terminal.utils.escape_filter import filter_terminal_queries


logger = get_logger(__name__)
Expand Down Expand Up @@ -120,7 +121,12 @@ def _get_command_output(
metadata: CmdOutputMetadata,
continue_prefix: str = "",
) -> str:
"""Get the command output with the previous command output removed."""
"""Get the command output with the previous command output removed.
Also filters terminal query sequences that could cause visible escape
code garbage when the output is displayed.
See: https://github.com/OpenHands/software-agent-sdk/issues/2244
"""
# remove the previous command output from the new output if any
if self.prev_output:
command_output = raw_command_output.removeprefix(self.prev_output)
Expand All @@ -129,6 +135,11 @@ def _get_command_output(
command_output = raw_command_output
self.prev_output = raw_command_output # update current command output anyway
command_output = _remove_command_prefix(command_output, command)

# Filter terminal query sequences that would cause the terminal to
# respond when displayed, producing visible garbage
command_output = filter_terminal_queries(command_output)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This only sanitizes bytes that flow back through the terminal tool. Issue #2244 also reproduces when the SDK process itself emits terminal queries (the minimal repro in the issue does that directly, and Rich capability detection is another example). Those paths never touch TerminalSession, so this patch doesn’t actually close the full bug it claims to fix. Either narrow the scope to PTY-emitted queries only, or handle the SDK-side leak at the conversation/visualizer boundary too.


return command_output.rstrip()

def _handle_completed_command(
Expand Down
14 changes: 14 additions & 0 deletions openhands-tools/openhands/tools/terminal/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""Terminal tool utilities."""

from openhands.tools.terminal.utils.command import (
escape_bash_special_chars,
split_bash_commands,
)
from openhands.tools.terminal.utils.escape_filter import filter_terminal_queries


__all__ = [
"escape_bash_special_chars",
"split_bash_commands",
"filter_terminal_queries",
]
89 changes: 89 additions & 0 deletions openhands-tools/openhands/tools/terminal/utils/escape_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
"""Filter terminal query sequences from captured output.

When CLI tools (like `gh`, `npm`, etc.) run inside a PTY, they may send
terminal query sequences as part of their progress/spinner UI. These queries
get captured as output. When displayed, the terminal processes them and
responds, causing visible escape code garbage.

This module provides filtering to remove these query sequences while
preserving legitimate formatting escape codes (colors, bold, etc.).

See: https://github.com/OpenHands/software-agent-sdk/issues/2244
"""

import re


# Terminal query sequences that trigger responses (and cause visible garbage)
# These should be stripped from captured output before display.
#
# Reference: ECMA-48, XTerm Control Sequences
# https://invisible-island.net/xterm/ctlseqs/ctlseqs.html

# DSR (Device Status Report) - cursor position query
# Format: ESC [ 6 n -> Response: ESC [ row ; col R
_DSR_PATTERN = re.compile(rb"\x1b\[6n")

# OSC (Operating System Command) queries
# Format: ESC ] Ps ; ? (BEL | ST)
# The ";?" pattern indicates a QUERY (vs SET which has actual values)
# Examples:
# OSC 10 ; ? - foreground color query
# OSC 11 ; ? - background color query
# OSC 4 ; index ; ? - palette color query
# OSC 12 ; ? - cursor color query
# OSC 17 ; ? - highlight background query
# Terminators: BEL (\x07) or ST (ESC \)
#
# This pattern matches ANY OSC query (ending with ;?) rather than
# specific codes, making it future-proof for other query types.
_OSC_QUERY_PATTERN = re.compile(
rb"\x1b\]" # OSC introducer
rb"\d+" # Parameter number (10, 11, 4, 12, etc.)
rb"(?:;[^;\x07\x1b]*)?" # Optional sub-parameter (e.g., palette index)
rb";\?" # Query marker - the key indicator this is a query
rb"(?:\x07|\x1b\\)" # BEL or ST terminator
)

# DA (Device Attributes) primary query
# Format: ESC [ c or ESC [ 0 c
_DA_PATTERN = re.compile(rb"\x1b\[0?c")

# DA2 (Secondary Device Attributes) query
# Format: ESC [ > c or ESC [ > 0 c
_DA2_PATTERN = re.compile(rb"\x1b\[>0?c")

# DECRQSS (Request Selection or Setting) - various terminal state queries
# Format: ESC P $ q <setting> ST
_DECRQSS_PATTERN = re.compile(
rb"\x1bP\$q" # DCS introducer + DECRQSS
rb"[^\x1b]*" # Setting identifier
rb"\x1b\\" # ST terminator
)


def filter_terminal_queries(output: str) -> str:
"""Filter terminal query sequences from captured terminal output.

Removes escape sequences that would cause the terminal to respond
when the output is displayed, while preserving legitimate formatting
sequences (colors, cursor movement, etc.).

Args:
output: Raw terminal output that may contain query sequences.

Returns:
Filtered output with query sequences removed.
"""
# Convert to bytes for regex matching (escape sequences are byte-level)
output_bytes = output.encode("utf-8", errors="surrogateescape")

# Remove each type of query sequence
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This filter is stateless, which means a query split across observations survives unchanged. For example, if one update ends with \u001b]11; and the next starts with ?\u0007, neither call matches, but the client still receives the full OSC query once those chunks are rendered in sequence. That’s not a theoretical edge case here because long-running commands are surfaced incrementally. The fix needs carry-over state for incomplete escape sequences, or it needs to run before the output is sliced into deltas.

output_bytes = _DSR_PATTERN.sub(b"", output_bytes)
output_bytes = _OSC_QUERY_PATTERN.sub(b"", output_bytes)
output_bytes = _DA_PATTERN.sub(b"", output_bytes)
output_bytes = _DA2_PATTERN.sub(b"", output_bytes)
output_bytes = _DECRQSS_PATTERN.sub(b"", output_bytes)

# Convert back to string
return output_bytes.decode("utf-8", errors="surrogateescape")
144 changes: 144 additions & 0 deletions tests/tools/terminal/test_escape_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
"""Tests for terminal escape sequence filtering.

See: https://github.com/OpenHands/software-agent-sdk/issues/2244
"""

from openhands.tools.terminal.utils.escape_filter import filter_terminal_queries


class TestFilterTerminalQueries:
"""Tests for the filter_terminal_queries function."""

def test_dsr_query_removed(self):
"""DSR (Device Status Report) queries should be removed."""
# \x1b[6n is the cursor position query
output = "some text\x1b[6nmore text"
result = filter_terminal_queries(output)
assert result == "some textmore text"

def test_osc_11_background_query_removed(self):
"""OSC 11 (background color query) should be removed."""
# \x1b]11;?\x07 queries background color
output = "start\x1b]11;?\x07end"
result = filter_terminal_queries(output)
assert result == "startend"

def test_osc_10_foreground_query_removed(self):
"""OSC 10 (foreground color query) should be removed."""
output = "start\x1b]10;?\x07end"
result = filter_terminal_queries(output)
assert result == "startend"

def test_osc_4_palette_query_removed(self):
"""OSC 4 (palette color query) should be removed."""
output = "start\x1b]4;?\x07end"
result = filter_terminal_queries(output)
assert result == "startend"

def test_osc_4_palette_with_index_query_removed(self):
"""OSC 4 with palette index (e.g., color 5) should be removed."""
output = "start\x1b]4;5;?\x07end"
result = filter_terminal_queries(output)
assert result == "startend"

def test_osc_12_cursor_color_query_removed(self):
"""OSC 12 (cursor color query) should be removed."""
output = "start\x1b]12;?\x07end"
result = filter_terminal_queries(output)
assert result == "startend"

def test_osc_17_highlight_query_removed(self):
"""OSC 17 (highlight background query) should be removed."""
output = "start\x1b]17;?\x07end"
result = filter_terminal_queries(output)
assert result == "startend"

def test_osc_set_title_preserved(self):
"""OSC 0 (set window title) should NOT be removed - it's a SET, not query."""
output = "start\x1b]0;My Window Title\x07end"
result = filter_terminal_queries(output)
assert result == output # Preserved as-is

def test_osc_hyperlink_preserved(self):
"""OSC 8 (hyperlink) should NOT be removed."""
output = "start\x1b]8;;https://example.com\x07link\x1b]8;;\x07end"
result = filter_terminal_queries(output)
assert result == output # Preserved as-is

def test_osc_with_st_terminator_removed(self):
"""OSC queries with ST terminator should be removed."""
# ST terminator is \x1b\\
output = "start\x1b]11;?\x1b\\end"
result = filter_terminal_queries(output)
assert result == "startend"

def test_da_primary_query_removed(self):
"""DA (Device Attributes) primary queries should be removed."""
# \x1b[c and \x1b[0c
output = "start\x1b[cend"
result = filter_terminal_queries(output)
assert result == "startend"

output2 = "start\x1b[0cend"
result2 = filter_terminal_queries(output2)
assert result2 == "startend"

def test_da2_secondary_query_removed(self):
"""DA2 (Secondary Device Attributes) queries should be removed."""
# \x1b[>c and \x1b[>0c
output = "start\x1b[>cend"
result = filter_terminal_queries(output)
assert result == "startend"

output2 = "start\x1b[>0cend"
result2 = filter_terminal_queries(output2)
assert result2 == "startend"

def test_decrqss_query_removed(self):
"""DECRQSS (Request Selection or Setting) queries should be removed."""
# \x1bP$q...\x1b\\
output = "start\x1bP$qsetting\x1b\\end"
result = filter_terminal_queries(output)
assert result == "startend"

def test_colors_preserved(self):
"""ANSI color codes should NOT be removed."""
# Red text: \x1b[31m
output = "normal \x1b[31mred text\x1b[0m normal"
result = filter_terminal_queries(output)
assert result == output

def test_cursor_movement_preserved(self):
"""Cursor movement codes should NOT be removed."""
# Move cursor: \x1b[H (home), \x1b[5A (up 5)
output = "start\x1b[Hmiddle\x1b[5Aend"
result = filter_terminal_queries(output)
assert result == output

def test_multiple_queries_removed(self):
"""Multiple query sequences should all be removed."""
output = "\x1b[6n\x1b]11;?\x07text\x1b[6n"
result = filter_terminal_queries(output)
assert result == "text"

def test_mixed_queries_and_formatting(self):
"""Queries removed while formatting preserved."""
# Color + query + more color
output = "\x1b[32mgreen\x1b[6nmore\x1b]11;?\x07text\x1b[0m"
result = filter_terminal_queries(output)
assert result == "\x1b[32mgreenmoretext\x1b[0m"

def test_empty_string(self):
"""Empty string should return empty string."""
assert filter_terminal_queries("") == ""

def test_no_escape_sequences(self):
"""Plain text without escape sequences passes through."""
output = "Hello, World!"
assert filter_terminal_queries(output) == output

def test_unicode_preserved(self):
"""Unicode characters should be preserved."""
output = "Hello 🌍 World \x1b[6n with emoji"
result = filter_terminal_queries(output)
assert result == "Hello 🌍 World with emoji"
Loading