From 5930cbf912e1313800cebfdb825f1f1edecd5cfc Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 4 Nov 2025 18:43:22 +0000 Subject: [PATCH 1/5] Initial plan From 304e8bb6a3267871b56e794e69109465d83310f1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 4 Nov 2025 19:01:13 +0000 Subject: [PATCH 2/5] Create ActionExecutionService to unify tool call execution Co-authored-by: galaxyeye <1701451+galaxyeye@users.noreply.github.com> --- .../agentic/ai/BrowserPerceptiveAgent.kt | 9 +- .../agentic/ai/InternalAgentExecutor.kt | 9 +- .../ai/support/ActionExecutionService.kt | 190 ++++++++++++++++++ .../agentic/ai/support/ToolCallExecutor.kt | 55 +++-- 4 files changed, 225 insertions(+), 38 deletions(-) create mode 100644 pulsar-core/pulsar-agentic/src/main/kotlin/ai/platon/pulsar/agentic/ai/support/ActionExecutionService.kt diff --git a/pulsar-core/pulsar-agentic/src/main/kotlin/ai/platon/pulsar/agentic/ai/BrowserPerceptiveAgent.kt b/pulsar-core/pulsar-agentic/src/main/kotlin/ai/platon/pulsar/agentic/ai/BrowserPerceptiveAgent.kt index 19330dd503..5de70c5c2f 100644 --- a/pulsar-core/pulsar-agentic/src/main/kotlin/ai/platon/pulsar/agentic/ai/BrowserPerceptiveAgent.kt +++ b/pulsar-core/pulsar-agentic/src/main/kotlin/ai/platon/pulsar/agentic/ai/BrowserPerceptiveAgent.kt @@ -3,6 +3,7 @@ package ai.platon.pulsar.agentic.ai import ai.platon.pulsar.agentic.AgenticSession import ai.platon.pulsar.agentic.ai.agent.* import ai.platon.pulsar.agentic.ai.agent.detail.* +import ai.platon.pulsar.agentic.ai.support.ActionExecutionService import ai.platon.pulsar.agentic.ai.support.AgentTool import ai.platon.pulsar.agentic.ai.support.ToolCallExecutor import ai.platon.pulsar.agentic.ai.tta.ActionDescription @@ -100,8 +101,8 @@ class BrowserPerceptiveAgent constructor( private val domService get() = inference.domService private val promptBuilder = PromptBuilder() - // Reuse ToolCallExecutor to avoid recreation overhead (Medium Priority #14) - private val toolCallExecutor = ToolCallExecutor() + // Reuse ActionExecutionService for unified action execution (Medium Priority #14) + private val actionExecutionService = ActionExecutionService() // Helper classes for better code organization private val pageStateTracker = PageStateTracker(session, config) @@ -378,8 +379,8 @@ class BrowserPerceptiveAgent constructor( val driver = activeDriver val callResult = when (toolCall.domain) { - "driver" -> toolCallExecutor.execute(toolCall, driver) - "browser" -> toolCallExecutor.execute(toolCall, driver.browser) + "driver" -> actionExecutionService.execute(toolCall, driver) + "browser" -> actionExecutionService.execute(toolCall, driver.browser) else -> throw IllegalArgumentException("❓ Unsupported domain: ${toolCall.domain} | $toolCall") } diff --git a/pulsar-core/pulsar-agentic/src/main/kotlin/ai/platon/pulsar/agentic/ai/InternalAgentExecutor.kt b/pulsar-core/pulsar-agentic/src/main/kotlin/ai/platon/pulsar/agentic/ai/InternalAgentExecutor.kt index a22207bc24..57d0360668 100644 --- a/pulsar-core/pulsar-agentic/src/main/kotlin/ai/platon/pulsar/agentic/ai/InternalAgentExecutor.kt +++ b/pulsar-core/pulsar-agentic/src/main/kotlin/ai/platon/pulsar/agentic/ai/InternalAgentExecutor.kt @@ -1,6 +1,7 @@ package ai.platon.pulsar.agentic.ai import ai.platon.pulsar.agentic.AgenticSession +import ai.platon.pulsar.agentic.ai.support.ActionExecutionService import ai.platon.pulsar.agentic.ai.support.ToolCallExecutor import ai.platon.pulsar.agentic.ai.tta.ActionDescription import ai.platon.pulsar.agentic.ai.tta.ToolCallResults @@ -21,7 +22,7 @@ internal class InternalAgentExecutor( session.sessionConfig ) - private val toolCallExecutor = ToolCallExecutor() + private val actionExecutionService = ActionExecutionService() val agent = BrowserPerceptiveAgent(driver, session) @@ -44,9 +45,11 @@ internal class InternalAgentExecutor( } val result = if (toolCall != null) { - toolCallExecutor.execute(toolCall, driver) + actionExecutionService.execute(toolCall, driver) } else { - action.expressions.take(1).map { expr -> toolCallExecutor.execute(expr, driver) }.firstOrNull() + action.expressions.take(1).map { expr -> + actionExecutionService.execute(expr, driver) + }.firstOrNull() } return ToolCallResults( diff --git a/pulsar-core/pulsar-agentic/src/main/kotlin/ai/platon/pulsar/agentic/ai/support/ActionExecutionService.kt b/pulsar-core/pulsar-agentic/src/main/kotlin/ai/platon/pulsar/agentic/ai/support/ActionExecutionService.kt new file mode 100644 index 0000000000..5b732beea0 --- /dev/null +++ b/pulsar-core/pulsar-agentic/src/main/kotlin/ai/platon/pulsar/agentic/ai/support/ActionExecutionService.kt @@ -0,0 +1,190 @@ +package ai.platon.pulsar.agentic.ai.support + +import ai.platon.pulsar.agentic.AgenticSession +import ai.platon.pulsar.agentic.ai.agent.detail.ActionValidator +import ai.platon.pulsar.common.brief +import ai.platon.pulsar.common.getLogger +import ai.platon.pulsar.skeleton.ai.ToolCall +import ai.platon.pulsar.skeleton.crawl.fetch.driver.Browser +import ai.platon.pulsar.skeleton.crawl.fetch.driver.WebDriver +import javax.script.ScriptEngineManager + +/** + * Unified service for executing agent actions and tool calls. + * + * This service consolidates all action execution logic into a single, maintainable module. + * It handles: + * - Tool call execution for driver and browser domains + * - Action validation + * - Expression evaluation + * - ToolCall to Expression conversion + * + * ## Architecture + * + * This service acts as the single entry point for all action/tool call operations, + * delegating to specialized executors while managing validation and error handling centrally. + * + * ## Example Usage + * + * ```kotlin + * val service = ActionExecutionService() + * val result = service.execute("driver.click(\"#button\")", driver) + * ``` + * + * @author Vincent Zhang, ivincent.zhang@gmail.com, platon.ai + */ +class ActionExecutionService { + private val logger = getLogger(this) + private val engine = ScriptEngineManager().getEngineByExtension("kts") + private val validator = ActionValidator() + + // Specialized executors for different domains + private val webDriverExecutor = WebDriverToolCallExecutor() + private val browserExecutor = BrowserToolCallExecutor() + + /** + * Execute a tool call on a WebDriver. + * + * @param toolCall The tool call to execute + * @param driver The WebDriver instance + * @return The result of the execution, or null if execution fails + */ + suspend fun execute(toolCall: ToolCall, driver: WebDriver): Any? { + require(toolCall.domain == "driver") { "Tool call domain should be `driver`" } + + // Validate before execution + validator.validateToolCall(toolCall) + + val expression = convertToExpression(toolCall) + ?: throw IllegalArgumentException("Failed to convert to expression: $toolCall") + + return try { + execute(expression, driver) + } catch (e: Exception) { + logger.warn("Error executing TOOL CALL: {} - {}", toolCall, e.brief()) + null + } + } + + /** + * Execute a tool call on a Browser. + * + * @param toolCall The tool call to execute + * @param browser The Browser instance + * @return The result of the execution, or null if execution fails + */ + suspend fun execute(toolCall: ToolCall, browser: Browser): Any? { + require(toolCall.domain == "browser") { "Tool call domain should be `browser`" } + + // Validate before execution + validator.validateToolCall(toolCall) + + val expression = convertToExpression(toolCall) ?: return null + + return try { + execute(expression, browser) + } catch (e: Exception) { + logger.warn("Error executing TOOL CALL: {} - {}", toolCall, e.brief()) + null + } + } + + /** + * Execute an expression string on a WebDriver. + * + * @param expression The expression to execute (e.g., "driver.click('#button')") + * @param driver The WebDriver instance + * @return The result of the execution + */ + suspend fun execute(expression: String, driver: WebDriver): Any? { + return webDriverExecutor.execute(expression, driver) + } + + /** + * Execute an expression string on a Browser. + * + * @param expression The expression to execute + * @param browser The Browser instance + * @return The result of the execution + */ + suspend fun execute(expression: String, browser: Browser): Any? { + return browserExecutor.execute(expression, browser) + } + + /** + * Execute an expression with session binding for browser operations. + * + * @param expression The expression to execute + * @param browser The Browser instance + * @param session The AgenticSession for driver binding + * @return The result of the execution + */ + suspend fun execute(expression: String, browser: Browser, session: AgenticSession): Any? { + return browserExecutor.execute(expression, browser, session) + } + + /** + * Evaluate an expression using Kotlin script engine. + * + * Slower and unsafe - use only when necessary. + * + * @param expression The Kotlin expression to evaluate + * @param variables Variables to bind in the script context + * @return The evaluation result + */ + fun eval(expression: String, variables: Map): Any? { + return try { + variables.forEach { (key, value) -> engine.put(key, value) } + engine.eval(expression) + } catch (e: Exception) { + logger.warn("Error eval expression: {} - {}", expression, e.brief()) + null + } + } + + /** + * Evaluate an expression with a WebDriver context. + */ + fun eval(expression: String, driver: WebDriver): Any? { + return eval(expression, mapOf("driver" to driver)) + } + + /** + * Evaluate an expression with a Browser context. + */ + fun eval(expression: String, browser: Browser): Any? { + return eval(expression, mapOf("browser" to browser)) + } + + /** + * Convert a ToolCall to its expression representation. + * + * This centralizes the conversion logic that was previously in ToolCallExecutor. + * + * @param toolCall The tool call to convert + * @return The expression string, or null if conversion fails + */ + fun convertToExpression(toolCall: ToolCall): String? { + return ToolCallExecutor.toolCallToExpression(toolCall) + } + + /** + * Parse a Kotlin function expression into its components as a ToolCall. + * + * @param input The expression string to parse + * @return Parsed ToolCall object + */ + fun parseExpression(input: String): ToolCall? { + return SimpleKotlinParser().parseFunctionExpression(input) + } + + /** + * Validate a tool call before execution. + * + * @param toolCall The tool call to validate + * @return true if valid, false otherwise + */ + fun validate(toolCall: ToolCall): Boolean { + return validator.validateToolCall(toolCall) + } +} diff --git a/pulsar-core/pulsar-agentic/src/main/kotlin/ai/platon/pulsar/agentic/ai/support/ToolCallExecutor.kt b/pulsar-core/pulsar-agentic/src/main/kotlin/ai/platon/pulsar/agentic/ai/support/ToolCallExecutor.kt index ea14cf328a..0f80ce26d3 100644 --- a/pulsar-core/pulsar-agentic/src/main/kotlin/ai/platon/pulsar/agentic/ai/support/ToolCallExecutor.kt +++ b/pulsar-core/pulsar-agentic/src/main/kotlin/ai/platon/pulsar/agentic/ai/support/ToolCallExecutor.kt @@ -17,6 +17,10 @@ import javax.script.ScriptEngineManager * It parses string commands and executes the corresponding WebDriver methods, enabling * script-based control of browser automation. * + * ## Deprecation Notice: + * This class is maintained for backward compatibility. New code should use [ActionExecutionService] + * which provides a more unified and maintainable approach to action execution. + * * ## Key Features: * - Supports a wide range of WebDriver commands, such as navigation, interaction, and evaluation. * - Provides error handling to ensure robust execution of commands. @@ -25,15 +29,27 @@ import javax.script.ScriptEngineManager * ## Example Usage: * * ```kotlin + * // Deprecated approach * val executor = ToolCallExecutor() * val result = executor.execute("driver.open('https://example.com')", driver) + * + * // Preferred approach + * val service = ActionExecutionService() + * val result = service.execute("driver.open('https://example.com')", driver) * ``` * * @author Vincent Zhang, ivincent.zhang@gmail.com, platon.ai */ +@Deprecated( + message = "Use ActionExecutionService for better maintainability", + replaceWith = ReplaceWith("ActionExecutionService", "ai.platon.pulsar.agentic.ai.support.ActionExecutionService") +) open class ToolCallExecutor { private val logger = getLogger(this) private val engine = ScriptEngineManager().getEngineByExtension("kts") + + // Delegate to the new unified service + private val service = ActionExecutionService() /** * Evaluate [expression]. @@ -45,11 +61,11 @@ open class ToolCallExecutor { * ``` * */ fun eval(expression: String, driver: WebDriver): Any? { - return eval(expression, mapOf("driver" to driver)) + return service.eval(expression, driver) } fun eval(expression: String, browser: Browser): Any? { - return eval(expression, mapOf("browser" to browser)) + return service.eval(expression, browser) } fun eval(expression: String, agent: PerceptiveAgent): Any? { @@ -57,13 +73,7 @@ open class ToolCallExecutor { } fun eval(expression: String, variables: Map): Any? { - return try { - variables.forEach { (key, value) -> engine.put(key, value) } - engine.eval(expression) - } catch (e: Exception) { - logger.warn("Error eval expression: {} - {}", expression, e.brief()) - null - } + return service.eval(expression, variables) } /** @@ -78,40 +88,23 @@ open class ToolCallExecutor { * @return The result of the command execution, or null if the command could not be executed. */ suspend fun execute(expression: String, driver: WebDriver): Any? { - return WebDriverToolCallExecutor().execute(expression, driver) + return service.execute(expression, driver) } suspend fun execute(expression: String, browser: Browser): Any? { - return BrowserToolCallExecutor().execute(expression, browser) + return service.execute(expression, browser) } suspend fun execute(expression: String, browser: Browser, session: AgenticSession): Any? { - return BrowserToolCallExecutor().execute(expression, browser, session) + return service.execute(expression, browser, session) } suspend fun execute(toolCall: ToolCall, driver: WebDriver): Any? { - require(toolCall.domain == "driver") { "Tool call domain should be `driver`" } - val expression = toolCallToExpression(toolCall) ?: - throw IllegalArgumentException("Failed to convert to expression: $toolCall") - - return try { - execute(expression, driver) - } catch (e: Exception) { - logger.warn("Error executing TOOL CALL: {} - {}", toolCall, e.brief()) - null - } + return service.execute(toolCall, driver) } suspend fun execute(toolCall: ToolCall, browser: Browser): Any? { - require(toolCall.domain == "browser") { "Tool call domain should be `browser`" } - val expression = toolCallToExpression(toolCall) ?: return null - - return try { - execute(expression, browser) - } catch (e: Exception) { - logger.warn("Error executing TOOL CALL: {} - {}", toolCall, e.brief()) - null - } + return service.execute(toolCall, browser) } suspend fun execute(toolCall: ToolCall, agent: PerceptiveAgent): Any? { From ecfb590717f59bc5ebd54e2cd380c1b6392f9719 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 4 Nov 2025 19:04:44 +0000 Subject: [PATCH 3/5] Add comprehensive tests and documentation for ActionExecutionService Co-authored-by: galaxyeye <1701451+galaxyeye@users.noreply.github.com> --- docs/agent-action-architecture.md | 195 ++++++++++++++++++ .../support/ActionExecutionServiceTest.kt | 147 +++++++++++++ 2 files changed, 342 insertions(+) create mode 100644 docs/agent-action-architecture.md create mode 100644 pulsar-core/pulsar-agentic/src/test/kotlin/ai/platon/pulsar/agentic/support/ActionExecutionServiceTest.kt diff --git a/docs/agent-action-architecture.md b/docs/agent-action-architecture.md new file mode 100644 index 0000000000..2484888035 --- /dev/null +++ b/docs/agent-action-architecture.md @@ -0,0 +1,195 @@ +# Agent Action and Tool Call Architecture + +## Overview + +This document describes the refactored architecture for agent actions and tool call execution in PulsarRPA. The refactoring consolidates scattered action execution logic into a unified, maintainable structure. + +## Architecture + +### Core Components + +#### 1. ActionExecutionService (New) + +**Location:** `ai.platon.pulsar.agentic.ai.support.ActionExecutionService` + +The central service for all action and tool call execution. This is the **recommended entry point** for new code. + +**Responsibilities:** +- Execute tool calls on WebDriver and Browser instances +- Convert between ToolCall objects and expression strings +- Validate tool calls before execution +- Parse Kotlin function expressions +- Provide script evaluation capabilities + +**Key Methods:** +```kotlin +// Execute operations +suspend fun execute(toolCall: ToolCall, driver: WebDriver): Any? +suspend fun execute(toolCall: ToolCall, browser: Browser): Any? +suspend fun execute(expression: String, driver: WebDriver): Any? +suspend fun execute(expression: String, browser: Browser): Any? + +// Conversion and parsing +fun convertToExpression(toolCall: ToolCall): String? +fun parseExpression(input: String): ToolCall? + +// Validation +fun validate(toolCall: ToolCall): Boolean + +// Script evaluation (use cautiously) +fun eval(expression: String, variables: Map): Any? +``` + +**Example Usage:** +```kotlin +val service = ActionExecutionService() + +// Execute a tool call +val toolCall = ToolCall("driver", "click", mutableMapOf("selector" to "#button")) +val result = service.execute(toolCall, driver) + +// Execute an expression +val result2 = service.execute("driver.scrollToBottom()", driver) + +// Convert and validate +val expression = service.convertToExpression(toolCall) +val isValid = service.validate(toolCall) +``` + +#### 2. ToolCallExecutor (Deprecated) + +**Location:** `ai.platon.pulsar.agentic.ai.support.ToolCallExecutor` + +**Status:** Maintained for backward compatibility, delegates to `ActionExecutionService` + +This class is deprecated and should not be used in new code. Existing code using `ToolCallExecutor` will continue to work but should be migrated to `ActionExecutionService` over time. + +#### 3. Supporting Executors + +##### WebDriverToolCallExecutor +Handles execution of driver-domain tool calls. Contains implementation details for all WebDriver commands. + +**Location:** `ai.platon.pulsar.agentic.ai.support.WebDriverToolCallExecutor` + +##### BrowserToolCallExecutor +Handles execution of browser-domain tool calls (e.g., tab switching). + +**Location:** `ai.platon.pulsar.agentic.ai.support.BrowserToolCallExecutor` + +##### ActionValidator +Validates tool calls before execution to ensure safety and correctness. + +**Location:** `ai.platon.pulsar.agentic.ai.agent.detail.ActionValidator` + +## Tool Call Domains + +PulsarRPA supports two domains for tool calls: + +### 1. Driver Domain +Operations on a specific WebDriver instance (browser tab). + +**Supported Actions:** +- Navigation: `navigateTo`, `goBack`, `goForward` +- Interaction: `click`, `fill`, `type`, `press`, `check`, `uncheck` +- Scrolling: `scrollTo`, `scrollToTop`, `scrollToBottom`, `scrollBy`, `scrollToMiddle` +- Selection: `exists`, `isVisible`, `focus`, `selectFirstTextOrNull`, `selectTextAll` +- Advanced: `clickTextMatches`, `clickMatches`, `evaluate`, `captureScreenshot` +- And more... (see `AgentTool.TOOL_CALL_SPECIFICATION`) + +### 2. Browser Domain +Operations at the browser level (multiple tabs). + +**Supported Actions:** +- `switchTab`: Switch between browser tabs + +## Migration Guide + +### For New Code + +Always use `ActionExecutionService`: + +```kotlin +// Good ✓ +val service = ActionExecutionService() +val result = service.execute(toolCall, driver) + +// Avoid ✗ +val executor = ToolCallExecutor() +val result = executor.execute(toolCall, driver) +``` + +### For Existing Code + +The `ToolCallExecutor` continues to work and delegates to `ActionExecutionService`, so existing code will function without changes. However, consider migrating gradually: + +**Before:** +```kotlin +class MyAgent { + private val toolCallExecutor = ToolCallExecutor() + + suspend fun performAction(toolCall: ToolCall, driver: WebDriver) { + return toolCallExecutor.execute(toolCall, driver) + } +} +``` + +**After:** +```kotlin +class MyAgent { + private val actionService = ActionExecutionService() + + suspend fun performAction(toolCall: ToolCall, driver: WebDriver) { + return actionService.execute(toolCall, driver) + } +} +``` + +## Benefits of the Refactoring + +1. **Single Entry Point**: All action execution goes through one service +2. **Better Maintainability**: Changes to execution logic only need to be made in one place +3. **Improved Testability**: Service can be easily tested in isolation +4. **Clear Separation of Concerns**: Execution, validation, and conversion are clearly separated +5. **Reduced Code Duplication**: Common logic is consolidated +6. **Easier Debugging**: Centralized logging and error handling + +## Testing + +The `ActionExecutionService` has comprehensive test coverage in: +- `ActionExecutionServiceTest`: Tests for the service API +- Existing `ToolCallExecutor*Test` files: Tests for backward compatibility + +Run tests: +```bash +./mvnw -pl pulsar-core/pulsar-agentic test +``` + +## Related Files + +- `pulsar-core/pulsar-agentic/src/main/kotlin/ai/platon/pulsar/agentic/ai/support/` + - `ActionExecutionService.kt` - Main service + - `ToolCallExecutor.kt` - Deprecated wrapper + - `WebDriverToolCallExecutor.kt` - Driver execution implementation + - `BrowserToolCallExecutor.kt` - Browser execution implementation + - `AgentTool.kt` - Tool specifications and metadata + - `SimpleKotlinParser.kt` - Expression parsing + +- `pulsar-core/pulsar-agentic/src/main/kotlin/ai/platon/pulsar/agentic/ai/agent/detail/` + - `ActionValidator.kt` - Validation logic + +## Future Enhancements + +Potential improvements to consider: + +1. **Plugin Architecture**: Allow custom action handlers to be registered +2. **Action History**: Track execution history for debugging +3. **Performance Metrics**: Monitor action execution performance +4. **Async Batch Execution**: Execute multiple actions in parallel +5. **Enhanced Error Recovery**: Automatic retry with backoff for transient failures + +## Support + +For questions or issues related to the action execution architecture: +- Check existing tests for usage examples +- Review the inline documentation in source files +- See `AgentTool.TOOL_CALL_SPECIFICATION` for supported actions diff --git a/pulsar-core/pulsar-agentic/src/test/kotlin/ai/platon/pulsar/agentic/support/ActionExecutionServiceTest.kt b/pulsar-core/pulsar-agentic/src/test/kotlin/ai/platon/pulsar/agentic/support/ActionExecutionServiceTest.kt new file mode 100644 index 0000000000..684dfc53af --- /dev/null +++ b/pulsar-core/pulsar-agentic/src/test/kotlin/ai/platon/pulsar/agentic/support/ActionExecutionServiceTest.kt @@ -0,0 +1,147 @@ +package ai.platon.pulsar.agentic.support + +import ai.platon.pulsar.agentic.ai.support.ActionExecutionService +import ai.platon.pulsar.skeleton.ai.ToolCall +import org.junit.jupiter.api.Assertions.* +import org.junit.jupiter.api.Test + +/** + * Tests for ActionExecutionService to verify unified action execution. + */ +class ActionExecutionServiceTest { + + private val service = ActionExecutionService() + + @Test + fun `parseExpression should parse simple driver calls`() { + val result = service.parseExpression("driver.goBack()") + + assertNotNull(result) + assertEquals("driver", result!!.domain) + assertEquals("goBack", result.method) + assertTrue(result.arguments.isEmpty()) + } + + @Test + fun `parseExpression should parse calls with arguments`() { + val result = service.parseExpression("driver.click(\"#button\")") + + assertNotNull(result) + assertEquals("driver", result!!.domain) + assertEquals("click", result.method) + assertEquals("#button", result.arguments["0"]) + } + + @Test + fun `parseExpression should parse browser calls`() { + val result = service.parseExpression("browser.switchTab(\"tab1\")") + + assertNotNull(result) + assertEquals("browser", result!!.domain) + assertEquals("switchTab", result.method) + assertEquals("tab1", result.arguments["0"]) + } + + @Test + fun `parseExpression should handle empty input`() { + val result = service.parseExpression("") + assertNull(result) + } + + @Test + fun `parseExpression should handle invalid expressions`() { + val result = service.parseExpression("invalid expression") + assertNull(result) + } + + @Test + fun `convertToExpression should convert simple tool calls`() { + val toolCall = ToolCall("driver", "goBack", mutableMapOf()) + val expression = service.convertToExpression(toolCall) + + assertNotNull(expression) + assertEquals("driver.goBack()", expression) + } + + @Test + fun `convertToExpression should convert tool calls with selector`() { + val toolCall = ToolCall("driver", "click", mutableMapOf("selector" to "#button")) + val expression = service.convertToExpression(toolCall) + + assertNotNull(expression) + assertTrue(expression!!.contains("#button")) + } + + @Test + fun `convertToExpression should convert navigateTo with URL`() { + val toolCall = ToolCall("driver", "navigateTo", mutableMapOf("url" to "https://example.com")) + val expression = service.convertToExpression(toolCall) + + assertNotNull(expression) + assertTrue(expression!!.contains("https://example.com")) + } + + @Test + fun `convertToExpression should handle browser switchTab`() { + val toolCall = ToolCall("browser", "switchTab", mutableMapOf("tabId" to "tab123")) + val expression = service.convertToExpression(toolCall) + + assertNotNull(expression) + assertEquals("browser.switchTab(\"tab123\")", expression) + } + + @Test + fun `validate should accept valid tool calls`() { + val toolCall = ToolCall("driver", "click", mutableMapOf("selector" to "#button")) + val isValid = service.validate(toolCall) + + assertTrue(isValid) + } + + @Test + fun `roundtrip conversion should preserve tool call information`() { + val originalToolCall = ToolCall("driver", "scrollToBottom", mutableMapOf()) + + // Convert to expression + val expression = service.convertToExpression(originalToolCall) + assertNotNull(expression) + + // Parse back to ToolCall + val parsedToolCall = service.parseExpression(expression!!) + assertNotNull(parsedToolCall) + + // Verify roundtrip + assertEquals(originalToolCall.domain, parsedToolCall!!.domain) + assertEquals(originalToolCall.method, parsedToolCall.method) + } + + @Test + fun `convertToExpression should handle special characters in selectors`() { + val toolCall = ToolCall("driver", "click", mutableMapOf( + "selector" to "#my-button[data-test=\"value\"]" + )) + val expression = service.convertToExpression(toolCall) + + assertNotNull(expression) + // Should properly escape quotes + assertTrue(expression!!.contains("\\\"")) + } + + @Test + fun `convertToExpression should handle scroll methods with parameters`() { + val toolCall = ToolCall("driver", "scrollToMiddle", mutableMapOf("ratio" to "0.7")) + val expression = service.convertToExpression(toolCall) + + assertNotNull(expression) + assertTrue(expression!!.contains("0.7")) + } + + @Test + fun `convertToExpression should handle delay with millis`() { + val toolCall = ToolCall("driver", "delay", mutableMapOf("millis" to "2000")) + val expression = service.convertToExpression(toolCall) + + assertNotNull(expression) + assertEquals("driver.delay(2000)", expression) + } +} From 9a9fae6e9db329c24c5385691ddd25942af4b744 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 4 Nov 2025 19:07:19 +0000 Subject: [PATCH 4/5] Update README-AI.md to reference new architecture documentation Co-authored-by: galaxyeye <1701451+galaxyeye@users.noreply.github.com> --- README-AI.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README-AI.md b/README-AI.md index 909f968d22..6c7ec1ce08 100644 --- a/README-AI.md +++ b/README-AI.md @@ -113,6 +113,9 @@ Module guidelines: - Agentic/LLM-facing APIs → `pulsar-core/pulsar-agentic` - AI client/utilities → `pulsar-third/pulsar-llm` +**Architecture Documentation:** +- [Agent Action and Tool Call Architecture](./docs/agent-action-architecture.md) - Unified action execution system + --- ## 6. Kotlin & Java Interop From 5cb94c23ab0106f0cd3e37cde4be5b4487d4fe19 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 4 Nov 2025 19:11:38 +0000 Subject: [PATCH 5/5] Address code review feedback - eliminate circular dependency and improve performance Co-authored-by: galaxyeye <1701451+galaxyeye@users.noreply.github.com> --- .../agentic/ai/InternalAgentExecutor.kt | 2 +- .../ai/support/ActionExecutionService.kt | 141 ++++++++++++++++- .../agentic/ai/support/ToolCallExecutor.kt | 149 ++---------------- 3 files changed, 153 insertions(+), 139 deletions(-) diff --git a/pulsar-core/pulsar-agentic/src/main/kotlin/ai/platon/pulsar/agentic/ai/InternalAgentExecutor.kt b/pulsar-core/pulsar-agentic/src/main/kotlin/ai/platon/pulsar/agentic/ai/InternalAgentExecutor.kt index 57d0360668..fdf1459d35 100644 --- a/pulsar-core/pulsar-agentic/src/main/kotlin/ai/platon/pulsar/agentic/ai/InternalAgentExecutor.kt +++ b/pulsar-core/pulsar-agentic/src/main/kotlin/ai/platon/pulsar/agentic/ai/InternalAgentExecutor.kt @@ -47,7 +47,7 @@ internal class InternalAgentExecutor( val result = if (toolCall != null) { actionExecutionService.execute(toolCall, driver) } else { - action.expressions.take(1).map { expr -> + action.expressions.take(1).map { expr -> actionExecutionService.execute(expr, driver) }.firstOrNull() } diff --git a/pulsar-core/pulsar-agentic/src/main/kotlin/ai/platon/pulsar/agentic/ai/support/ActionExecutionService.kt b/pulsar-core/pulsar-agentic/src/main/kotlin/ai/platon/pulsar/agentic/ai/support/ActionExecutionService.kt index 5b732beea0..6aba7c41d2 100644 --- a/pulsar-core/pulsar-agentic/src/main/kotlin/ai/platon/pulsar/agentic/ai/support/ActionExecutionService.kt +++ b/pulsar-core/pulsar-agentic/src/main/kotlin/ai/platon/pulsar/agentic/ai/support/ActionExecutionService.kt @@ -37,6 +37,7 @@ class ActionExecutionService { private val logger = getLogger(this) private val engine = ScriptEngineManager().getEngineByExtension("kts") private val validator = ActionValidator() + private val parser = SimpleKotlinParser() // Specialized executors for different domains private val webDriverExecutor = WebDriverToolCallExecutor() @@ -165,7 +166,143 @@ class ActionExecutionService { * @return The expression string, or null if conversion fails */ fun convertToExpression(toolCall: ToolCall): String? { - return ToolCallExecutor.toolCallToExpression(toolCall) + return toolCallToExpression(toolCall) + } + + // Basic string escaper to safely embed values inside Kotlin string literals + private fun String.esc(): String = this + .replace("\\", "\\\\") + .replace("\"", "\\\"") + + private fun toolCallToExpression(tc: ToolCall): String? { + validator.validateToolCall(tc) + + val arguments = tc.arguments + return when (tc.method) { + // Navigation + "open" -> arguments["url"]?.let { "driver.open(\"${it.esc()}\")" } + "navigateTo" -> arguments["url"]?.let { "driver.navigateTo(\"${it.esc()}\")" } + "goBack" -> "driver.goBack()" + "goForward" -> "driver.goForward()" + // Wait + "waitForSelector" -> arguments["selector"]?.let { sel -> + "driver.waitForSelector(\"${sel.esc()}\", ${(arguments["timeoutMillis"] ?: 5000)})" + } + // Status checking (first batch of new tools) + "exists" -> arguments["selector"]?.let { "driver.exists(\"${it.esc()}\")" } + "isVisible" -> arguments["selector"]?.let { "driver.isVisible(\"${it.esc()}\")" } + "focus" -> arguments["selector"]?.let { "driver.focus(\"${it.esc()}\")" } + // Basic interactions + "click" -> arguments["selector"]?.esc()?.let { + val modifier = arguments["modifier"]?.esc() + val count = arguments["count"]?.toIntOrNull() ?: 1 + when { + modifier != null -> "driver.click(\"$it\", \"$modifier\")" + else -> "driver.click(\"$it\", $count)" + } + } + "fill" -> arguments["selector"]?.let { s -> + val text = arguments["text"]?.esc() ?: "" + "driver.fill(\"${s.esc()}\", \"$text\")" + } + + "press" -> arguments["selector"]?.let { s -> + arguments["key"]?.let { k -> "driver.press(\"${s.esc()}\", \"${k.esc()}\")" } + } + + "check" -> arguments["selector"]?.let { "driver.check(\"${it.esc()}\")" } + "uncheck" -> arguments["selector"]?.let { "driver.uncheck(\"${it.esc()}\")" } + // Scrolling + "scrollDown" -> "driver.scrollDown(${arguments["count"] ?: 1})" + "scrollUp" -> "driver.scrollUp(${arguments["count"] ?: 1})" + "scrollBy" -> { + val pixels = (arguments["pixels"] ?: 200.0).toString() + val smooth = (arguments["smooth"] ?: true).toString() + "driver.scrollBy(${pixels}, ${smooth})" + } + "scrollTo" -> arguments["selector"]?.let { "driver.scrollTo(\"${it.esc()}\")" } + "scrollToTop" -> "driver.scrollToTop()" + "scrollToBottom" -> "driver.scrollToBottom()" + "scrollToMiddle" -> "driver.scrollToMiddle(${arguments["ratio"] ?: 0.5})" + "scrollToScreen" -> arguments["screenNumber"]?.let { n -> "driver.scrollToScreen(${n})" } + // Advanced clicks + "clickTextMatches" -> arguments["selector"]?.let { s -> + val pattern = arguments["pattern"]?.esc() ?: return@let null + val count = arguments["count"] ?: 1 + "driver.clickTextMatches(\"${s.esc()}\", \"$pattern\", $count)" + } + + "clickMatches" -> arguments["selector"]?.let { s -> + val attr = arguments["attrName"]?.esc() ?: return@let null + val pattern = arguments["pattern"]?.esc() ?: return@let null + val count = arguments["count"] ?: 1 + "driver.clickMatches(\"${s.esc()}\", \"$attr\", \"$pattern\", $count)" + } + + "clickNthAnchor" -> arguments["n"]?.let { n -> + val root = arguments["rootSelector"] ?: "body" + "driver.clickNthAnchor(${n}, \"${root.esc()}\")" + } + // Enhanced navigation + "waitForNavigation" -> { + val oldUrl = arguments["oldUrl"] ?: "" + val timeout = arguments["timeoutMillis"] ?: 5000L + "driver.waitForNavigation(\"${oldUrl.esc()}\", ${timeout})" + } + // Screenshots + "captureScreenshot" -> { + val sel = arguments["selector"] + if (sel.isNullOrBlank()) "driver.captureScreenshot()" else "driver.captureScreenshot(\"${sel.esc()}\")" + } + // Timing + "delay" -> "driver.delay(${arguments["millis"] ?: 1000})" + // URL and document info + "currentUrl" -> "driver.currentUrl()" + "url" -> "driver.url()" + "documentURI" -> "driver.documentURI()" + "baseURI" -> "driver.baseURI()" + "referrer" -> "driver.referrer()" + "pageSource" -> "driver.pageSource()" + "getCookies" -> "driver.getCookies()" + // Additional status checking + "isHidden" -> arguments["selector"]?.let { "driver.isHidden(\"${it.esc()}\")" } + "visible" -> arguments["selector"]?.let { "driver.visible(\"${it.esc()}\")" } + "isChecked" -> arguments["selector"]?.let { "driver.isChecked(\"${it.esc()}\")" } + "bringToFront" -> "driver.bringToFront()" + // Additional interactions + "type" -> arguments["selector"]?.let { s -> + arguments["text"]?.let { t -> "driver.type(\"${s.esc()}\", \"${t.esc()}\")" } + } + "scrollToViewport" -> arguments["n"]?.let { "driver.scrollToViewport(${it})" } + "mouseWheelDown" -> "driver.mouseWheelDown(${arguments["count"] ?: 1}, ${arguments["deltaX"] ?: 0.0}, ${arguments["deltaY"] ?: 150.0}, ${arguments["delayMillis"] ?: 0})" + "mouseWheelUp" -> "driver.mouseWheelUp(${arguments["count"] ?: 1}, ${arguments["deltaX"] ?: 0.0}, ${arguments["deltaY"] ?: -150.0}, ${arguments["delayMillis"] ?: 0})" + "moveMouseTo" -> arguments["x"]?.let { x -> + arguments["y"]?.let { y -> "driver.moveMouseTo(${x}, ${y})" } + } ?: arguments["selector"]?.let { s -> + "driver.moveMouseTo(\"${s.esc()}\", ${arguments["deltaX"] ?: 0}, ${arguments["deltaY"] ?: 0})" + } + "dragAndDrop" -> arguments["selector"]?.let { s -> + "driver.dragAndDrop(\"${s.esc()}\", ${arguments["deltaX"] ?: 0}, ${arguments["deltaY"] ?: 0})" + } + // HTML and text extraction + "outerHTML" -> arguments["selector"]?.let { "driver.outerHTML(\"${it.esc()}\")" } ?: "driver.outerHTML()" + "textContent" -> "driver.textContent()" + "selectFirstTextOrNull" -> arguments["selector"]?.let { "driver.selectFirstTextOrNull(\"${it.esc()}\")" } + "selectTextAll" -> arguments["selector"]?.let { "driver.selectTextAll(\"${it.esc()}\")" } + "selectFirstAttributeOrNull" -> arguments["selector"]?.let { s -> + arguments["attrName"]?.let { a -> "driver.selectFirstAttributeOrNull(\"${s.esc()}\", \"${a.esc()}\")" } + } + "selectAttributes" -> arguments["selector"]?.let { "driver.selectAttributes(\"${it.esc()}\")" } + "selectAttributeAll" -> arguments["selector"]?.let { s -> + arguments["attrName"]?.let { a -> "driver.selectAttributeAll(\"${s.esc()}\", \"${a.esc()}\", ${arguments["start"] ?: 0}, ${arguments["limit"] ?: 10000})" } + } + "selectImages" -> arguments["selector"]?.let { "driver.selectImages(\"${it.esc()}\", ${arguments["offset"] ?: 1}, ${arguments["limit"] ?: Int.MAX_VALUE})" } + // JavaScript evaluation + "evaluate" -> arguments["expression"]?.let { "driver.evaluate(\"${it.esc()}\")" } + // Browser-level operations + "switchTab" -> arguments["tabId"]?.let { "browser.switchTab(\"${it.esc()}\")" } + else -> null + } } /** @@ -175,7 +312,7 @@ class ActionExecutionService { * @return Parsed ToolCall object */ fun parseExpression(input: String): ToolCall? { - return SimpleKotlinParser().parseFunctionExpression(input) + return parser.parseFunctionExpression(input) } /** diff --git a/pulsar-core/pulsar-agentic/src/main/kotlin/ai/platon/pulsar/agentic/ai/support/ToolCallExecutor.kt b/pulsar-core/pulsar-agentic/src/main/kotlin/ai/platon/pulsar/agentic/ai/support/ToolCallExecutor.kt index 0f80ce26d3..1b2825e551 100644 --- a/pulsar-core/pulsar-agentic/src/main/kotlin/ai/platon/pulsar/agentic/ai/support/ToolCallExecutor.kt +++ b/pulsar-core/pulsar-agentic/src/main/kotlin/ai/platon/pulsar/agentic/ai/support/ToolCallExecutor.kt @@ -112,149 +112,26 @@ open class ToolCallExecutor { } companion object { + private val conversionService = ActionExecutionService() + /** * Parses a function call from a text string into its components. * Uses a robust state machine to correctly handle: * - Strings with commas and escaped quotes/backslashes * - Nested parentheses inside arguments * - Optional whitespace and trailing commas + * + * @deprecated Use ActionExecutionService.parseExpression instead */ - fun parseKotlinFunctionExpression(input: String) = SimpleKotlinParser().parseFunctionExpression(input) + @Deprecated("Use ActionExecutionService.parseExpression", ReplaceWith("ActionExecutionService().parseExpression(input)")) + fun parseKotlinFunctionExpression(input: String) = conversionService.parseExpression(input) - // Basic string escaper to safely embed values inside Kotlin string literals - private fun String.esc(): String = this - .replace("\\", "\\\\") - .replace("\"", "\\\"") - - fun toolCallToExpression(tc: ToolCall): String? { - ActionValidator().validateToolCall(tc) - - val arguments = tc.arguments - return when (tc.method) { - // Navigation - "open" -> arguments["url"]?.let { "driver.open(\"${it.esc()}\")" } - "navigateTo" -> arguments["url"]?.let { "driver.navigateTo(\"${it.esc()}\")" } - "goBack" -> "driver.goBack()" - "goForward" -> "driver.goForward()" - // Wait - "waitForSelector" -> arguments["selector"]?.let { sel -> - "driver.waitForSelector(\"${sel.esc()}\", ${(arguments["timeoutMillis"] ?: 5000)})" - } - // Status checking (first batch of new tools) - "exists" -> arguments["selector"]?.let { "driver.exists(\"${it.esc()}\")" } - "isVisible" -> arguments["selector"]?.let { "driver.isVisible(\"${it.esc()}\")" } - "focus" -> arguments["selector"]?.let { "driver.focus(\"${it.esc()}\")" } - // Basic interactions - "click" -> arguments["selector"]?.esc()?.let { - val modifier = arguments["modifier"]?.esc() - val count = arguments["count"]?.toIntOrNull() ?: 1 - when { - modifier != null -> "driver.click(\"$it\", \"$modifier\")" - else -> "driver.click(\"$it\", $count)" - } - } - "fill" -> arguments["selector"]?.let { s -> - val text = arguments["text"]?.esc() ?: "" - "driver.fill(\"${s.esc()}\", \"$text\")" - } - - "press" -> arguments["selector"]?.let { s -> - arguments["key"]?.let { k -> "driver.press(\"${s.esc()}\", \"${k.esc()}\")" } - } - - "check" -> arguments["selector"]?.let { "driver.check(\"${it.esc()}\")" } - "uncheck" -> arguments["selector"]?.let { "driver.uncheck(\"${it.esc()}\")" } - // Scrolling - "scrollDown" -> "driver.scrollDown(${arguments["count"] ?: 1})" - "scrollUp" -> "driver.scrollUp(${arguments["count"] ?: 1})" - "scrollBy" -> { - val pixels = (arguments["pixels"] ?: 200.0).toString() - val smooth = (arguments["smooth"] ?: true).toString() - "driver.scrollBy(${pixels}, ${smooth})" - } - "scrollTo" -> arguments["selector"]?.let { "driver.scrollTo(\"${it.esc()}\")" } - "scrollToTop" -> "driver.scrollToTop()" - "scrollToBottom" -> "driver.scrollToBottom()" - "scrollToMiddle" -> "driver.scrollToMiddle(${arguments["ratio"] ?: 0.5})" - "scrollToScreen" -> arguments["screenNumber"]?.let { n -> "driver.scrollToScreen(${n})" } - // Advanced clicks - "clickTextMatches" -> arguments["selector"]?.let { s -> - val pattern = arguments["pattern"]?.esc() ?: return@let null - val count = arguments["count"] ?: 1 - "driver.clickTextMatches(\"${s.esc()}\", \"$pattern\", $count)" - } - - "clickMatches" -> arguments["selector"]?.let { s -> - val attr = arguments["attrName"]?.esc() ?: return@let null - val pattern = arguments["pattern"]?.esc() ?: return@let null - val count = arguments["count"] ?: 1 - "driver.clickMatches(\"${s.esc()}\", \"$attr\", \"$pattern\", $count)" - } - - "clickNthAnchor" -> arguments["n"]?.let { n -> - val root = arguments["rootSelector"] ?: "body" - "driver.clickNthAnchor(${n}, \"${root.esc()}\")" - } - // Enhanced navigation - "waitForNavigation" -> { - val oldUrl = arguments["oldUrl"] ?: "" - val timeout = arguments["timeoutMillis"] ?: 5000L - "driver.waitForNavigation(\"${oldUrl.esc()}\", ${timeout})" - } - // Screenshots - "captureScreenshot" -> { - val sel = arguments["selector"] - if (sel.isNullOrBlank()) "driver.captureScreenshot()" else "driver.captureScreenshot(\"${sel.esc()}\")" - } - // Timing - "delay" -> "driver.delay(${arguments["millis"] ?: 1000})" - // URL and document info - "currentUrl" -> "driver.currentUrl()" - "url" -> "driver.url()" - "documentURI" -> "driver.documentURI()" - "baseURI" -> "driver.baseURI()" - "referrer" -> "driver.referrer()" - "pageSource" -> "driver.pageSource()" - "getCookies" -> "driver.getCookies()" - // Additional status checking - "isHidden" -> arguments["selector"]?.let { "driver.isHidden(\"${it.esc()}\")" } - "visible" -> arguments["selector"]?.let { "driver.visible(\"${it.esc()}\")" } - "isChecked" -> arguments["selector"]?.let { "driver.isChecked(\"${it.esc()}\")" } - "bringToFront" -> "driver.bringToFront()" - // Additional interactions - "type" -> arguments["selector"]?.let { s -> - arguments["text"]?.let { t -> "driver.type(\"${s.esc()}\", \"${t.esc()}\")" } - } - "scrollToViewport" -> arguments["n"]?.let { "driver.scrollToViewport(${it})" } - "mouseWheelDown" -> "driver.mouseWheelDown(${arguments["count"] ?: 1}, ${arguments["deltaX"] ?: 0.0}, ${arguments["deltaY"] ?: 150.0}, ${arguments["delayMillis"] ?: 0})" - "mouseWheelUp" -> "driver.mouseWheelUp(${arguments["count"] ?: 1}, ${arguments["deltaX"] ?: 0.0}, ${arguments["deltaY"] ?: -150.0}, ${arguments["delayMillis"] ?: 0})" - "moveMouseTo" -> arguments["x"]?.let { x -> - arguments["y"]?.let { y -> "driver.moveMouseTo(${x}, ${y})" } - } ?: arguments["selector"]?.let { s -> - "driver.moveMouseTo(\"${s.esc()}\", ${arguments["deltaX"] ?: 0}, ${arguments["deltaY"] ?: 0})" - } - "dragAndDrop" -> arguments["selector"]?.let { s -> - "driver.dragAndDrop(\"${s.esc()}\", ${arguments["deltaX"] ?: 0}, ${arguments["deltaY"] ?: 0})" - } - // HTML and text extraction - "outerHTML" -> arguments["selector"]?.let { "driver.outerHTML(\"${it.esc()}\")" } ?: "driver.outerHTML()" - "textContent" -> "driver.textContent()" - "selectFirstTextOrNull" -> arguments["selector"]?.let { "driver.selectFirstTextOrNull(\"${it.esc()}\")" } - "selectTextAll" -> arguments["selector"]?.let { "driver.selectTextAll(\"${it.esc()}\")" } - "selectFirstAttributeOrNull" -> arguments["selector"]?.let { s -> - arguments["attrName"]?.let { a -> "driver.selectFirstAttributeOrNull(\"${s.esc()}\", \"${a.esc()}\")" } - } - "selectAttributes" -> arguments["selector"]?.let { "driver.selectAttributes(\"${it.esc()}\")" } - "selectAttributeAll" -> arguments["selector"]?.let { s -> - arguments["attrName"]?.let { a -> "driver.selectAttributeAll(\"${s.esc()}\", \"${a.esc()}\", ${arguments["start"] ?: 0}, ${arguments["limit"] ?: 10000})" } - } - "selectImages" -> arguments["selector"]?.let { "driver.selectImages(\"${it.esc()}\", ${arguments["offset"] ?: 1}, ${arguments["limit"] ?: Int.MAX_VALUE})" } - // JavaScript evaluation - "evaluate" -> arguments["expression"]?.let { "driver.evaluate(\"${it.esc()}\")" } - // Browser-level operations - "switchTab" -> arguments["tabId"]?.let { "browser.switchTab(\"${it.esc()}\")" } - else -> null - } - } + /** + * Convert a ToolCall to its expression representation. + * + * @deprecated Use ActionExecutionService.convertToExpression instead + */ + @Deprecated("Use ActionExecutionService.convertToExpression", ReplaceWith("ActionExecutionService().convertToExpression(tc)")) + fun toolCallToExpression(tc: ToolCall): String? = conversionService.convertToExpression(tc) } }