Skip to content

Commit 4b1bbc6

Browse files
committed
Add browser use agent!
1 parent 4e8fc7c commit 4b1bbc6

File tree

2 files changed

+455
-0
lines changed

2 files changed

+455
-0
lines changed
Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
/**
2+
* Test script for the browser-use agent.
3+
*
4+
* Runs the agent on browser tasks one at a time, writing full event traces
5+
* to files for analysis. Each task produces a trace file in debug/browser-agent-traces/.
6+
*
7+
* Usage:
8+
* bun agents/browser-use/browser-use.test.ts [taskIndex]
9+
*
10+
* If taskIndex is provided, runs only that task (0-based). Otherwise runs all tasks.
11+
*/
12+
13+
import * as fs from 'fs'
14+
import * as path from 'path'
15+
16+
import { CodebuffClient, loadLocalAgents } from '@codebuff/sdk'
17+
18+
import type { AgentDefinition } from '@codebuff/sdk'
19+
20+
const TRACE_DIR = path.join(process.cwd(), 'debug', 'browser-agent-traces')
21+
22+
interface TaskDefinition {
23+
name: string
24+
prompt: string
25+
url?: string
26+
}
27+
28+
const TASKS: TaskDefinition[] = [
29+
{
30+
name: 'wikipedia-search',
31+
prompt:
32+
'Navigate to Wikipedia, search for "TypeScript programming language", and tell me the first sentence of the article.',
33+
url: 'https://en.wikipedia.org',
34+
},
35+
{
36+
name: 'hacker-news-top',
37+
prompt:
38+
'Navigate to Hacker News and tell me the titles of the top 3 stories on the front page.',
39+
url: 'https://news.ycombinator.com',
40+
},
41+
{
42+
name: 'example-form',
43+
prompt:
44+
'Navigate to https://httpbin.org/forms/post and fill out the form with: customer name "Test User", telephone "555-1234", size "Medium", topping "Bacon", and submit the form. Report what the server response shows.',
45+
url: 'https://httpbin.org/forms/post',
46+
},
47+
]
48+
49+
interface TraceEvent {
50+
timestamp: string
51+
type: string
52+
data: Record<string, unknown>
53+
}
54+
55+
async function runTask(
56+
client: CodebuffClient,
57+
task: TaskDefinition,
58+
agentDefinitions: AgentDefinition[],
59+
taskIndex: number,
60+
): Promise<{ success: boolean; traceFile: string; output: unknown }> {
61+
const events: TraceEvent[] = []
62+
const startTime = Date.now()
63+
64+
console.log(`\n${'='.repeat(60)}`)
65+
console.log(`Task ${taskIndex}: ${task.name}`)
66+
console.log(`Prompt: ${task.prompt}`)
67+
console.log(`${'='.repeat(60)}\n`)
68+
69+
const runState = await client.run({
70+
agent: 'browser-use',
71+
prompt: task.prompt,
72+
params: task.url ? { url: task.url } : undefined,
73+
agentDefinitions,
74+
maxAgentSteps: 30,
75+
handleEvent: (event) => {
76+
events.push({
77+
timestamp: new Date().toISOString(),
78+
type: event.type,
79+
data: event as Record<string, unknown>,
80+
})
81+
82+
if (event.type === 'text') {
83+
process.stdout.write(event.text ?? '')
84+
} else if (event.type === 'tool_call') {
85+
console.log(`\n[Tool Call] ${event.toolName}`)
86+
} else if (event.type === 'tool_result') {
87+
const preview = JSON.stringify(event.output)?.slice(0, 200)
88+
console.log(`[Tool Result] ${preview}...`)
89+
} else if (event.type === 'error') {
90+
console.error(`[Error] ${event.message}`)
91+
} else if (event.type === 'subagent_start') {
92+
console.log(`[Subagent Start] ${event.agentType}`)
93+
} else if (event.type === 'subagent_finish') {
94+
console.log(`[Subagent Finish] ${event.agentType}`)
95+
}
96+
},
97+
})
98+
99+
const duration = ((Date.now() - startTime) / 1000).toFixed(1)
100+
const output = runState.output
101+
102+
const trace = {
103+
task: {
104+
name: task.name,
105+
prompt: task.prompt,
106+
url: task.url,
107+
},
108+
duration: `${duration}s`,
109+
output,
110+
eventCount: events.length,
111+
events,
112+
}
113+
114+
const timestamp = new Date().toISOString().replace(/[:.]/g, '-')
115+
const traceFile = path.join(
116+
TRACE_DIR,
117+
`${timestamp}_${task.name}.json`,
118+
)
119+
fs.writeFileSync(traceFile, JSON.stringify(trace, null, 2))
120+
121+
const success = output?.type !== 'error'
122+
123+
console.log(`\n${'─'.repeat(60)}`)
124+
console.log(`Result: ${success ? '✅ SUCCESS' : '❌ FAILURE'}`)
125+
console.log(`Duration: ${duration}s`)
126+
console.log(`Events: ${events.length}`)
127+
console.log(`Trace: ${traceFile}`)
128+
129+
if (output?.type === 'error') {
130+
console.log(`Error: ${output.message}`)
131+
} else if (output?.type === 'structuredOutput') {
132+
const data = output.value as Record<string, unknown> | null
133+
console.log(`Status: ${data?.overallStatus}`)
134+
console.log(`Summary: ${data?.summary}`)
135+
if (data && Array.isArray(data.lessons) && data.lessons.length > 0) {
136+
console.log(`Lessons:`)
137+
for (const lesson of data.lessons) {
138+
console.log(` - ${lesson}`)
139+
}
140+
}
141+
}
142+
console.log(`${'─'.repeat(60)}`)
143+
144+
return { success, traceFile, output }
145+
}
146+
147+
async function main() {
148+
fs.mkdirSync(TRACE_DIR, { recursive: true })
149+
150+
const taskIndexArg = process.argv[2]
151+
const tasksToRun =
152+
taskIndexArg !== undefined
153+
? [{ task: TASKS[parseInt(taskIndexArg, 10)], index: parseInt(taskIndexArg, 10) }]
154+
: TASKS.map((task, index) => ({ task, index }))
155+
156+
if (tasksToRun.some((t) => !t.task)) {
157+
console.error(`Invalid task index: ${taskIndexArg}. Available: 0-${TASKS.length - 1}`)
158+
process.exit(1)
159+
}
160+
161+
const agents = await loadLocalAgents({ agentsPath: path.join(process.cwd(), 'agents'), verbose: true })
162+
const agentDefinitions = Object.values(agents) as AgentDefinition[]
163+
164+
const browserAgent = agentDefinitions.find((a) => a.id === 'browser-use')
165+
if (!browserAgent) {
166+
console.error('browser-use agent not found in agents/ directory')
167+
process.exit(1)
168+
}
169+
console.log(`Loaded browser-use agent (model: ${browserAgent.model})`)
170+
171+
const client = new CodebuffClient({
172+
apiKey: process.env.CODEBUFF_API_KEY,
173+
cwd: process.cwd(),
174+
})
175+
176+
const results: Array<{ name: string; success: boolean; traceFile: string }> = []
177+
178+
for (const { task, index } of tasksToRun) {
179+
const result = await runTask(client, task, agentDefinitions, index)
180+
results.push({ name: task.name, success: result.success, traceFile: result.traceFile })
181+
}
182+
183+
console.log(`\n${'='.repeat(60)}`)
184+
console.log('SUMMARY')
185+
console.log(`${'='.repeat(60)}`)
186+
for (const r of results) {
187+
console.log(` ${r.success ? '✅' : '❌'} ${r.name}${r.traceFile}`)
188+
}
189+
const passed = results.filter((r) => r.success).length
190+
console.log(`\n${passed}/${results.length} tasks passed`)
191+
}
192+
193+
if (import.meta.main) {
194+
main().catch((err) => {
195+
console.error('Fatal error:', err)
196+
process.exit(1)
197+
})
198+
}

0 commit comments

Comments
 (0)