OpenHands · csmith49 · Feb 25, 2026 · Feb 25, 2026 · Feb 25, 2026 · Feb 25, 2026
diff --git a/openhands-sdk/openhands/sdk/conversation/compliance/__init__.py b/openhands-sdk/openhands/sdk/conversation/compliance/__init__.py
@@ -0,0 +1,25 @@
+"""API Compliance monitoring for conversation events.
+
+This module provides an APIComplianceMonitor that detects and rejects violations
+of LLM API requirements in the event stream. Violating events are logged and
+rejected (not added to conversation). Future versions may support reconciliation
+strategies.
+
+The monitor enforces valid tool-call sequences:
+- When tool calls are pending, only matching observations are allowed
+- Messages cannot interleave with pending tool calls
+- Tool results must reference known tool_call_ids
+"""
+
+from openhands.sdk.conversation.compliance.base import (
+    ComplianceState,
+    ComplianceViolation,
+)
+from openhands.sdk.conversation.compliance.monitor import APIComplianceMonitor
+
+
+__all__ = [
+    "APIComplianceMonitor",
+    "ComplianceState",
+    "ComplianceViolation",
+]
diff --git a/openhands-sdk/openhands/sdk/conversation/compliance/base.py b/openhands-sdk/openhands/sdk/conversation/compliance/base.py
@@ -0,0 +1,41 @@
+"""Base classes for API compliance monitoring."""
+
+from dataclasses import dataclass, field
+
+from openhands.sdk.event.types import EventID, ToolCallID
+
+
+@dataclass
+class ComplianceViolation:
+    """Represents an API compliance violation.
+
+    Attributes:
+        property_name: Name of the property that was violated.
+        event_id: ID of the event that caused the violation.
+        description: Human-readable description of the violation.
+        context: Optional additional context (e.g., related tool_call_ids).
+    """
+
+    property_name: str
+    event_id: EventID
+    description: str
+    context: dict[str, object] | None = None
+
+
+@dataclass
+class ComplianceState:
+    """Shared state for tracking API compliance.
+
+    Tracks the tool call lifecycle to detect violations:
+    - pending_tool_call_ids: Actions awaiting results
+    - completed_tool_call_ids: Actions that have received results
+
+    Attributes:
+        pending_tool_call_ids: Tool calls that have been made but not yet
+            received results. Maps tool_call_id to the ActionEvent id.
+        completed_tool_call_ids: Tool calls that have received results.
+            Used to detect duplicate results.
+    """
+
+    pending_tool_call_ids: dict[ToolCallID, EventID] = field(default_factory=dict)
+    completed_tool_call_ids: set[ToolCallID] = field(default_factory=set)
diff --git a/openhands-sdk/openhands/sdk/conversation/compliance/monitor.py b/openhands-sdk/openhands/sdk/conversation/compliance/monitor.py
@@ -0,0 +1,172 @@
+"""API Compliance Monitor that checks events before adding to conversation."""
+
+from openhands.sdk.conversation.compliance.base import (
+    ComplianceState,
+    ComplianceViolation,
+)
+from openhands.sdk.event import (
+    ActionEvent,
+    LLMConvertibleEvent,
+    MessageEvent,
+    ObservationBaseEvent,
+)
+from openhands.sdk.logger import get_logger
+
+
+logger = get_logger(__name__)
+
+
+class APIComplianceMonitor:
+    """Monitors events for API compliance violations.
+
+    Enforces valid tool-call sequences by checking what events are allowed
+    given current state. The key invariant: when tool calls are pending,
+    only matching observations are allowed.
+
+    State machine:
+    - IDLE (no pending calls): Messages and new actions allowed
+    - TOOL_CALLING (pending calls): Only matching observations allowed
+    Currently operates in rejection mode: violating events are logged and rejected
+    (not added to the conversation).
+
+    Attributes:
+        state: Compliance state tracking pending/completed tool calls.
+    """
+
+    def __init__(self) -> None:
+        """Initialize the compliance monitor."""
+        self.state = ComplianceState()
+
+    def _check_tool_call_sequence(
+        self, event: LLMConvertibleEvent
+    ) -> ComplianceViolation | None:
+        """Check if an event violates the tool-call sequence property.
+
+        The rule is simple: if we have pending tool calls, only matching
+        observations are allowed. This covers all 8 API compliance patterns:
+
+        - a01 (unmatched_tool_use): Message while calls pending
+        - a02 (unmatched_tool_result): Result with unknown ID
+        - a03 (interleaved_user_msg): User message while calls pending
+        - a04 (interleaved_asst_msg): Assistant message while calls pending
+        - a05 (duplicate_tool_call_id): Result for already-completed ID
+        - a06 (wrong_tool_call_id): Result with wrong/unknown ID
+        - a07 (parallel_missing_result): Message before all parallel results
+        - a08 (parallel_wrong_order): Result before action (unknown ID)
+
+        Args:
+            event: The event to check.
+
+        Returns:
+            A ComplianceViolation if the event violates the property, None otherwise.
+        """
+        # Actions are always allowed - they start or continue a tool-call batch
+        if isinstance(event, ActionEvent):
+            return None
+
+        # Messages require no pending tool calls
+        if isinstance(event, MessageEvent):
+            if self.state.pending_tool_call_ids:
+                pending_ids = list(self.state.pending_tool_call_ids.keys())
+                return ComplianceViolation(
+                    property_name="interleaved_message",
+                    event_id=event.id,
+                    description=(
+                        f"Message interleaved with {len(pending_ids)} pending "
+                        f"tool call(s)"
+                    ),
+                    context={"pending_tool_call_ids": pending_ids},
+                )
+            return None
+
+        # Observations must match a known tool_call_id
+        if isinstance(event, ObservationBaseEvent):
+            tool_call_id = event.tool_call_id
+
+            # Check for valid match (pending)
+            if tool_call_id in self.state.pending_tool_call_ids:
+                return None  # Valid - completes a pending call
+
+            # Check for duplicate (already completed)
+            if tool_call_id in self.state.completed_tool_call_ids:
+                return ComplianceViolation(
+                    property_name="duplicate_tool_result",
+                    event_id=event.id,
+                    description=(
+                        f"Duplicate tool result for tool_call_id: {tool_call_id}"
+                    ),
+                    context={"tool_call_id": tool_call_id},
+                )
+
+            # Unknown ID - orphan result (covers a02, a06, a08)
+            return ComplianceViolation(
+                property_name="unmatched_tool_result",
+                event_id=event.id,
+                description=(
+                    f"Tool result references unknown tool_call_id: {tool_call_id}"
+                ),
+                context={"tool_call_id": tool_call_id},
+            )
+
+        return None
+
+    def _update_state(self, event: LLMConvertibleEvent) -> None:
+        """Update compliance state after processing an event.
+
+        Tracks the tool-call lifecycle:
+        - ActionEvent: Add to pending
+        - ObservationBaseEvent: Move from pending to completed
+        """
+        if isinstance(event, ActionEvent):
+            self.state.pending_tool_call_ids[event.tool_call_id] = event.id
+        elif isinstance(event, ObservationBaseEvent):
+            # Move from pending to completed (if it was pending)
+            self.state.pending_tool_call_ids.pop(event.tool_call_id, None)
+            self.state.completed_tool_call_ids.add(event.tool_call_id)
+
+    def process_event(self, event: LLMConvertibleEvent) -> list[ComplianceViolation]:
+        """Check an event for violations and update state.
+
+        Fail-closed semantics: if checking crashes, the event is treated as
+        violating (state is not updated). Only events that pass checking
+        without violations have their state updated.
+
+        Args:
+            event: The event to process.
+
+        Returns:
+            List of violations detected (empty if compliant).
+        """
+        violations: list[ComplianceViolation] = []
+        check_failed = False
+
+        try:
+            violation = self._check_tool_call_sequence(event)
+            if violation is not None:
+                violations.append(violation)
+                logger.warning(
+                    "API compliance violation detected: %s - %s (event_id=%s)",
+                    violation.property_name,
+                    violation.description,
+                    violation.event_id,
+                )
+        except Exception as e:
+            logger.exception(
+                "Error checking compliance for event %s: %s",
+                event.id,
+                e,
+            )
+            check_failed = True
+
+        # Only update state if check succeeded and no violations
+        if not check_failed and not violations:
+            try:
+                self._update_state(event)
+            except Exception as e:
+                logger.exception(
+                    "Error updating compliance state for event %s: %s",
+                    event.id,
+                    e,
+                )
+
+        return violations
diff --git a/openhands-sdk/openhands/sdk/conversation/events_list_base.py b/openhands-sdk/openhands/sdk/conversation/events_list_base.py
@@ -2,6 +2,7 @@
 from collections.abc import Sequence
 
 from openhands.sdk.event import Event
+from openhands.sdk.event.types import EventID
 
 
 class EventsListBase(Sequence[Event], ABC):
@@ -15,3 +16,8 @@ class EventsListBase(Sequence[Event], ABC):
     def append(self, event: Event) -> None:
         """Add a new event to the list."""
         ...
+
+    @abstractmethod
+    def get_index(self, event_id: EventID) -> int:
+        """Return the integer index for a given event_id."""
+        ...
diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py
@@ -178,12 +178,16 @@ def __init__(
             cipher=cipher,
         )
 
-        # Default callback: persist every event to state
+        # Default callback: persist every event to state with compliance checking
         def _default_callback(e):
             # This callback runs while holding the conversation state's lock
             # (see BaseConversation.compose_callbacks usage inside `with self._state:`
             # regions), so updating state here is thread-safe.
-            self._state.events.append(e)
+            #
+            # Use add_event() to check API compliance before appending.
+            # Events with violations are logged and rejected (not added to event log).
+            # Violations are logged but events are still processed.
+            self._state.add_event(e)
             # Track user MessageEvent IDs here so hook callbacks (which may
             # synthesize or alter user messages) are captured in one place.
             if isinstance(e, MessageEvent) and e.source == "user":

diff --git a/openhands-sdk/openhands/sdk/conversation/impl/remote_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/remote_conversation.py
@@ -352,6 +352,14 @@ def append(self, event: Event) -> None:
         """Add a new event to the list (for compatibility with EventLog interface)."""
         self.add_event(event)
 
+    def get_index(self, event_id: str) -> int:
+        """Return the integer index for a given event_id."""
+        with self._lock:
+            for idx, event in enumerate(self._cached_events):
+                if event.id == event_id:
+                    return idx
+        raise KeyError(f"Unknown event_id: {event_id}")
+
     def create_default_callback(self) -> ConversationCallbackType:
         """Create a default callback that adds events to this list."""