diff --git a/.pr/review.md b/.pr/review.md new file mode 100644 index 0000000000..1d8c7c2fcc --- /dev/null +++ b/.pr/review.md @@ -0,0 +1,86 @@ +# PR #2356 Review: execute_tool API endpoint + +## Summary + +This PR adds a `POST /api/conversations/{id}/execute_tool` endpoint that allows +executing tools (like the terminal) directly on a conversation without going +through the agent loop. This is primarily designed for `setup.sh` execution where +environment changes need to persist in the agent's terminal session. + +## CI Fixes Applied + +### 1. Pyright type errors in `event_service.py` (pre-commit) +**Problem**: `self._conversation` is typed `LocalConversation | None`. Pyright +cannot narrow the type inside a nested closure after the None-check. + +**Fix**: Captured `self._conversation` in a local variable `conversation` before +defining the `_execute()` closure. + +### 2. `Observation` ABC instantiation in `remote_conversation.py` (sdk-tests) +**Problem**: `Observation` is an abstract base class (ABC) and cannot be +instantiated directly. `BaseObservation.from_text()` raised a validation error. + +**Fix**: Created a private concrete `_RemoteObservation` subclass inside +`execute_tool()` to hold the response text. + +### 3. Stale test `test_remote_conversation_execute_tool_not_implemented` +**Problem**: The test expected `NotImplementedError` but the PR now implements +the method. + +**Fix**: Replaced with `test_remote_conversation_execute_tool` that mocks the +API response and verifies the observation is correctly parsed. + +### 4. Line-too-long issues (E501) +Fixed in `event_service.py` docstring and `models.py` field description. + +## Functional Test Results + +Started the agent server and tested the endpoint with: + +### ✅ Successful tool execution +``` +POST /api/conversations/{id}/execute_tool +{"tool_name": "terminal", "action": {"command": "echo hello world && pwd"}} + +Response: {"observation": {"content": [{"text": "hello world\n/tmp/test_workspace"}], ...}, "is_error": false} +``` + +### ✅ Environment variable persistence (core use case) +``` +# Set variable +{"tool_name": "terminal", "action": {"command": "export MY_SETUP_VAR=setup_complete"}} +# → success + +# Read variable in subsequent call +{"tool_name": "terminal", "action": {"command": "echo $MY_SETUP_VAR"}} +# → "setup_complete" ← Variable persists across calls! +``` + +### ✅ Error handling: nonexistent tool +``` +{"tool_name": "nonexistent_tool", "action": {"command": "echo test"}} +# → 400: "Tool 'nonexistent_tool' not found. Available tools: ['terminal', 'finish', 'think']" +``` + +### ✅ Error handling: nonexistent conversation +``` +POST /api/conversations/00000000-0000-0000-0000-000000000000/execute_tool +# → 404: "Not Found" +``` + +## Code Review Notes + +### Architecture +The layered approach is clean and consistent with existing patterns: +- **Router** → **ConversationService** → **EventService** → **LocalConversation** +- `_ensure_agent_ready()` handles lazy initialization of tools + +### Minor observation +In `conversation_router.py`, the `except KeyError` followed by `except Exception` +is technically redundant (Exception already catches KeyError), but it serves as +documentation of expected error types and is harmless. + +### Overall Assessment +The PR is well-structured and solves a real problem. The key insight — +executing through the agent's persistent terminal session rather than ephemeral +subprocesses — correctly addresses the `setup.sh` environment persistence issue. diff --git a/openhands-agent-server/openhands/agent_server/conversation_router.py b/openhands-agent-server/openhands/agent_server/conversation_router.py index 7e3d3f3acb..a832e6ddf6 100644 --- a/openhands-agent-server/openhands/agent_server/conversation_router.py +++ b/openhands-agent-server/openhands/agent_server/conversation_router.py @@ -14,6 +14,8 @@ ConversationInfo, ConversationPage, ConversationSortOrder, + ExecuteToolRequest, + ExecuteToolResponse, GenerateTitleRequest, GenerateTitleResponse, SendMessageRequest, @@ -326,3 +328,37 @@ async def condense_conversation( if not success: raise HTTPException(status.HTTP_404_NOT_FOUND, detail="Conversation not found") return Success() + + +@conversation_router.post( + "/{conversation_id}/execute_tool", + responses={ + 404: {"description": "Item not found"}, + 400: {"description": "Tool not found or execution error"}, + }, +) +async def execute_tool( + conversation_id: UUID, + request: ExecuteToolRequest, + conversation_service: ConversationService = Depends(get_conversation_service), +) -> ExecuteToolResponse: + """Execute a tool directly on a conversation without going through the agent loop. + + This is useful for pre-run setup operations like running setup scripts + through the agent's terminal tool so environment changes persist in the + agent's session. + """ + try: + result = await conversation_service.execute_tool( + conversation_id, request.tool_name, request.action + ) + except KeyError as e: + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e)) + except Exception as e: + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e)) + if result is None: + raise HTTPException(status.HTTP_404_NOT_FOUND) + return ExecuteToolResponse( + observation=result, + is_error=result.get("is_error", False), + ) diff --git a/openhands-agent-server/openhands/agent_server/conversation_service.py b/openhands-agent-server/openhands/agent_server/conversation_service.py index d04710e37b..50ec95eb7b 100644 --- a/openhands-agent-server/openhands/agent_server/conversation_service.py +++ b/openhands-agent-server/openhands/agent_server/conversation_service.py @@ -445,6 +445,20 @@ async def ask_agent(self, conversation_id: UUID, question: str) -> str | None: response = await event_service.ask_agent(question) return response + async def execute_tool( + self, conversation_id: UUID, tool_name: str, action_dict: dict + ) -> dict | None: + """Execute a tool directly on a conversation. + + Returns the observation dict, or None if the conversation was not found. + """ + if self._event_services is None: + raise ValueError("inactive_service") + event_service = self._event_services.get(conversation_id) + if event_service is None: + return None + return await event_service.execute_tool(tool_name, action_dict) + async def condense(self, conversation_id: UUID) -> bool: """Force condensation of the conversation history.""" if self._event_services is None: diff --git a/openhands-agent-server/openhands/agent_server/event_service.py b/openhands-agent-server/openhands/agent_server/event_service.py index 9b2dd03799..323d4c1622 100644 --- a/openhands-agent-server/openhands/agent_server/event_service.py +++ b/openhands-agent-server/openhands/agent_server/event_service.py @@ -330,6 +330,46 @@ async def _run_with_error_handling(): # blocking on the full conversation execution. loop.create_task(_run_with_error_handling()) + async def execute_tool(self, tool_name: str, action_dict: dict) -> dict: + """Execute a tool directly on the conversation. + + Bypasses the agent loop. Useful for pre-run setup operations + like running .openhands/setup.sh through the agent's terminal + tool so environment changes persist in the agent's session. + + Args: + tool_name: The name of the tool to execute (e.g., 'terminal') + action_dict: The action parameters as a dictionary + + Returns: + The observation as a dictionary + + Raises: + ValueError: If the service is inactive + KeyError: If the tool is not found + """ + if not self._conversation: + raise ValueError("inactive_service") + + conversation = self._conversation + loop = asyncio.get_running_loop() + + def _execute(): + # Get the tool to resolve the action type + conversation._ensure_agent_ready() + tool = conversation.agent.tools_map.get(tool_name) + if tool is None: + available_tools = list(conversation.agent.tools_map.keys()) + raise KeyError( + f"Tool '{tool_name}' not found. Available tools: {available_tools}" + ) + # Validate the action dict against the tool's action type + action = tool.action_type.model_validate(action_dict) + observation = conversation.execute_tool(tool_name, action) + return observation.model_dump(mode="json") + + return await loop.run_in_executor(None, _execute) + async def subscribe_to_events(self, subscriber: Subscriber[Event]) -> UUID: subscriber_id = self._pub_sub.subscribe(subscriber) diff --git a/openhands-agent-server/openhands/agent_server/models.py b/openhands-agent-server/openhands/agent_server/models.py index 02810fccc2..22958fedfc 100644 --- a/openhands-agent-server/openhands/agent_server/models.py +++ b/openhands-agent-server/openhands/agent_server/models.py @@ -295,6 +295,28 @@ class AskAgentResponse(BaseModel): response: str = Field(description="The agent's response to the question") +class ExecuteToolRequest(BaseModel): + """Payload to execute a tool directly on a conversation.""" + + tool_name: str = Field( + description="The name of the tool to execute (e.g., 'terminal')" + ) + action: dict[str, Any] = Field( + description="The action parameters to pass to the tool executor" + ) + + +class ExecuteToolResponse(BaseModel): + """Response from executing a tool.""" + + observation: dict[str, Any] = Field( + description="The observation returned by the tool execution" + ) + is_error: bool = Field( + default=False, description="Whether the tool execution resulted in an error" + ) + + class BashEventBase(DiscriminatedUnionMixin, ABC): """Base class for all bash event types""" diff --git a/openhands-sdk/openhands/sdk/conversation/impl/remote_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/remote_conversation.py index f0f813432e..c0ccbdcf72 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/remote_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/remote_conversation.py @@ -1210,24 +1210,50 @@ def condense(self) -> None: def execute_tool(self, tool_name: str, action: "Action") -> "Observation": """Execute a tool directly without going through the agent loop. - Note: This method is not yet supported for RemoteConversation. - Tool execution for remote conversations happens on the server side - during the normal agent loop. + This sends the tool execution request to the agent server, which + executes it through the conversation's tool executor. This is useful + for pre-run setup operations like running setup scripts through the + agent's terminal tool so environment changes persist in the agent's + session. Args: tool_name: The name of the tool to execute action: The action to pass to the tool executor + Returns: + The observation returned by the tool execution + Raises: - NotImplementedError: Always, as this feature is not yet supported - for remote conversations. + RuntimeError: If the tool execution fails on the server """ - raise NotImplementedError( - "execute_tool is not yet supported for RemoteConversation. " - "Tool execution for remote conversations happens on the server side " - "during the normal agent loop. Use LocalConversation for direct " - "tool execution." + from openhands.sdk.tool.schema import Observation + + payload = { + "tool_name": tool_name, + "action": action.model_dump(mode="json"), + } + response = _send_request( + self._client, + "POST", + f"/api/conversations/{self._id}/execute_tool", + json=payload, ) + result = response.json() + observation_data = result.get("observation", {}) + is_error = result.get("is_error", False) + + # Extract text from the observation data + text = observation_data.get("text", "") + if not text: + content = observation_data.get("content", []) + if content and isinstance(content, list): + text = content[0].get("text", "") if content else "" + + # Use a concrete subclass since Observation is abstract + class _RemoteObservation(Observation): + pass + + return _RemoteObservation.from_text(text=text, is_error=is_error) def close(self) -> None: """Close the conversation and clean up resources. diff --git a/openhands-sdk/openhands/sdk/workspace/remote/async_remote_workspace.py b/openhands-sdk/openhands/sdk/workspace/remote/async_remote_workspace.py index 814d6f354e..b3e293a428 100644 --- a/openhands-sdk/openhands/sdk/workspace/remote/async_remote_workspace.py +++ b/openhands-sdk/openhands/sdk/workspace/remote/async_remote_workspace.py @@ -149,6 +149,41 @@ async def git_diff(self, path: str | Path) -> GitDiff: result = await self._execute(generator) return result + async def execute_tool( + self, + conversation_id: str, + tool_name: str, + action: dict[str, Any], + timeout: float = 600.0, + ) -> dict[str, Any]: + """Execute a tool on a conversation through the agent server. + + This calls the agent server's execute_tool endpoint, which runs the tool + through the conversation's tool executor (e.g., the persistent terminal + session). This is useful for pre-run setup operations like running + .openhands/setup.sh where environment changes need to persist. + + Args: + conversation_id: The conversation ID (hex string) + tool_name: The name of the tool to execute (e.g., 'terminal') + action: The action parameters as a dictionary + timeout: Timeout in seconds + + Returns: + The response dict with 'observation' and 'is_error' keys + """ + payload = { + "tool_name": tool_name, + "action": action, + } + response = await self.client.post( + f"/api/conversations/{conversation_id}/execute_tool", + json=payload, + timeout=timeout, + ) + response.raise_for_status() + return response.json() + @property def alive(self) -> bool: """Check if the remote workspace is alive by querying the health endpoint. diff --git a/tests/sdk/conversation/remote/test_remote_conversation.py b/tests/sdk/conversation/remote/test_remote_conversation.py index 6f859ca6e9..1d7944d73b 100644 --- a/tests/sdk/conversation/remote/test_remote_conversation.py +++ b/tests/sdk/conversation/remote/test_remote_conversation.py @@ -1004,31 +1004,45 @@ def test_remote_conversation_host_url_normalization(self, mock_ws_client): @patch( "openhands.sdk.conversation.impl.remote_conversation.WebSocketCallbackClient" ) - def test_remote_conversation_execute_tool_not_implemented(self, mock_ws_client): - """Test that execute_tool raises NotImplementedError for RemoteConversation.""" + def test_remote_conversation_execute_tool(self, mock_ws_client): + """Test that execute_tool calls the server endpoint.""" # Setup mocks mock_client_instance = self.setup_mock_client() - conversation_id = str(uuid.uuid4()) - mock_conv_response = self.create_mock_conversation_response(conversation_id) - mock_events_response = self.create_mock_events_response() - - mock_client_instance.post.return_value = mock_conv_response - mock_client_instance.get.return_value = mock_events_response - mock_ws_instance = Mock() mock_ws_client.return_value = mock_ws_instance # Create conversation conversation = RemoteConversation(agent=self.agent, workspace=self.workspace) - # Create a dummy action (using a simple mock) + # Mock the execute_tool API response + mock_execute_response = Mock() + mock_execute_response.status_code = 200 + mock_execute_response.raise_for_status.return_value = None + mock_execute_response.json.return_value = { + "observation": { + "content": [{"text": "hello world", "kind": "text"}], + "is_error": False, + }, + "is_error": False, + } + + # Override request for execute_tool endpoint + original_side_effect = mock_client_instance.request.side_effect + + def request_side_effect(method, url, **kwargs): + if method == "POST" and "execute_tool" in url: + return mock_execute_response + return original_side_effect(method, url, **kwargs) + + mock_client_instance.request.side_effect = request_side_effect + + # Create a mock action with model_dump from unittest.mock import MagicMock mock_action = MagicMock() + mock_action.model_dump.return_value = {"command": "echo hello"} - # Verify execute_tool raises NotImplementedError - with pytest.raises(NotImplementedError) as exc_info: - conversation.execute_tool("any_tool", mock_action) - - assert "not yet supported for RemoteConversation" in str(exc_info.value) + result = conversation.execute_tool("terminal", mock_action) + assert result.text == "hello world" + assert result.is_error is False