From 71e6be74a1feab56acc36d25be11dff0014a1d1e Mon Sep 17 00:00:00 2001 From: Hugo Dutka Date: Tue, 10 Mar 2026 11:14:32 +0000 Subject: [PATCH] feat: add go example --- examples/agent-go/.gitignore | 2 + examples/agent-go/README.md | 41 ++ examples/agent-go/go.mod | 65 +++ examples/agent-go/go.sum | 150 ++++++ examples/agent-go/main.go | 871 +++++++++++++++++++++++++++++++++++ 5 files changed, 1129 insertions(+) create mode 100644 examples/agent-go/.gitignore create mode 100644 examples/agent-go/README.md create mode 100644 examples/agent-go/go.mod create mode 100644 examples/agent-go/go.sum create mode 100644 examples/agent-go/main.go diff --git a/examples/agent-go/.gitignore b/examples/agent-go/.gitignore new file mode 100644 index 0000000..da7ca35 --- /dev/null +++ b/examples/agent-go/.gitignore @@ -0,0 +1,2 @@ +tmp/ +agent-go diff --git a/examples/agent-go/README.md b/examples/agent-go/README.md new file mode 100644 index 0000000..8cb0fcb --- /dev/null +++ b/examples/agent-go/README.md @@ -0,0 +1,41 @@ +# examples/agent-go + +Minimal computer-use demo using `portabledesktop` and the [Fantasy AI SDK](https://github.com/hugodutka/fantasy) for Go. + +## What it does + +1. Starts a desktop session. +2. Starts a live VNC viewer and opens it in your host browser. +3. Runs an agent loop for your `--prompt`, streaming text to stdout as it arrives. +4. Saves an MP4 recording and opens it in your host browser. + +## Setup + +```bash +cd examples/agent-go +go mod download +``` + +Set `ANTHROPIC_API_KEY` in repo-root `.env.local` or your shell. + +## Run + +```bash +go run . --prompt "Open coder.com and find the Dropbox customer story" +``` + +### Flags + +| Flag | Default | Description | +|------|---------|-------------| +| `--prompt` | *(news.ycombinator.com top story)* | Prompt to send to the agent | +| `--model` | `claude-opus-4-6` | Anthropic model ID | +| `--max-steps` | `100` | Maximum agent loop iterations | + +Override the `portabledesktop` binary path with `PORTABLEDESKTOP_BIN`. + +## Notes + +- The example launches a desktop browser automatically. +- Recordings are saved under `examples/agent-go/tmp/`. +- Idle segments in the recording are auto-sped up for demo readability. diff --git a/examples/agent-go/go.mod b/examples/agent-go/go.mod new file mode 100644 index 0000000..351c147 --- /dev/null +++ b/examples/agent-go/go.mod @@ -0,0 +1,65 @@ +module github.com/coder/portabledesktop/examples/agent-go + +go 1.26.0 + +require charm.land/fantasy v0.0.0-20260310103256-242e3d156af6 + +require ( + cloud.google.com/go/auth v0.18.2 // indirect + cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect + cloud.google.com/go/compute/metadata v0.9.0 // indirect + github.com/aws/aws-sdk-go-v2 v1.41.1 // indirect + github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.4 // indirect + github.com/aws/aws-sdk-go-v2/config v1.32.9 // indirect + github.com/aws/aws-sdk-go-v2/credentials v1.19.9 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.17 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.17 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.17 // indirect + github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.4 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.17 // indirect + github.com/aws/aws-sdk-go-v2/service/signin v1.0.5 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.30.10 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.14 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.41.6 // indirect + github.com/aws/smithy-go v1.24.1 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/charmbracelet/anthropic-sdk-go v0.0.0-20260223140439-63879b0b8dab // indirect + github.com/charmbracelet/x/exp/slice v0.0.0-20250904123553-b4e2667e5ad5 // indirect + github.com/felixge/httpsnoop v1.0.4 // indirect + github.com/go-json-experiment/json v0.0.0-20251027170946-4849db3c2f7e // indirect + github.com/go-logr/logr v1.4.3 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/go-viper/mapstructure/v2 v2.5.0 // indirect + github.com/goccy/go-yaml v1.19.2 // indirect + github.com/google/s2a-go v0.1.9 // indirect + github.com/googleapis/enterprise-certificate-proxy v0.3.12 // indirect + github.com/googleapis/gax-go/v2 v2.17.0 // indirect + github.com/kaptinlin/go-i18n v0.2.11 // indirect + github.com/kaptinlin/jsonpointer v0.4.16 // indirect + github.com/kaptinlin/jsonschema v0.7.3 // indirect + github.com/kaptinlin/messageformat-go v0.4.18 // indirect + github.com/tidwall/gjson v1.18.0 // indirect + github.com/tidwall/match v1.1.1 // indirect + github.com/tidwall/pretty v1.2.1 // indirect + github.com/tidwall/sjson v1.2.5 // indirect + go.opentelemetry.io/auto/sdk v1.2.1 // indirect + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.65.0 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.65.0 // indirect + go.opentelemetry.io/otel v1.40.0 // indirect + go.opentelemetry.io/otel/metric v1.40.0 // indirect + go.opentelemetry.io/otel/trace v1.40.0 // indirect + golang.org/x/crypto v0.48.0 // indirect + golang.org/x/net v0.50.0 // indirect + golang.org/x/oauth2 v0.35.0 // indirect + golang.org/x/sync v0.19.0 // indirect + golang.org/x/sys v0.41.0 // indirect + golang.org/x/text v0.34.0 // indirect + golang.org/x/time v0.14.0 // indirect + google.golang.org/api v0.267.0 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20260217215200-42d3e9bedb6d // indirect + google.golang.org/grpc v1.79.1 // indirect + google.golang.org/protobuf v1.36.11 // indirect +) + +replace charm.land/fantasy => github.com/hugodutka/fantasy v0.0.0-20260310103256-242e3d156af6 diff --git a/examples/agent-go/go.sum b/examples/agent-go/go.sum new file mode 100644 index 0000000..c76b592 --- /dev/null +++ b/examples/agent-go/go.sum @@ -0,0 +1,150 @@ +cloud.google.com/go/auth v0.18.2 h1:+Nbt5Ev0xEqxlNjd6c+yYUeosQ5TtEUaNcN/3FozlaM= +cloud.google.com/go/auth v0.18.2/go.mod h1:xD+oY7gcahcu7G2SG2DsBerfFxgPAJz17zz2joOFF3M= +cloud.google.com/go/auth/oauth2adapt v0.2.8 h1:keo8NaayQZ6wimpNSmW5OPc283g65QNIiLpZnkHRbnc= +cloud.google.com/go/auth/oauth2adapt v0.2.8/go.mod h1:XQ9y31RkqZCcwJWNSx2Xvric3RrU88hAYYbjDWYDL+c= +cloud.google.com/go/compute/metadata v0.9.0 h1:pDUj4QMoPejqq20dK0Pg2N4yG9zIkYGdBtwLoEkH9Zs= +cloud.google.com/go/compute/metadata v0.9.0/go.mod h1:E0bWwX5wTnLPedCKqk3pJmVgCBSM6qQI1yTBdEb3C10= +github.com/aws/aws-sdk-go-v2 v1.41.1 h1:ABlyEARCDLN034NhxlRUSZr4l71mh+T5KAeGh6cerhU= +github.com/aws/aws-sdk-go-v2 v1.41.1/go.mod h1:MayyLB8y+buD9hZqkCW3kX1AKq07Y5pXxtgB+rRFhz0= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.4 h1:489krEF9xIGkOaaX3CE/Be2uWjiXrkCH6gUX+bZA/BU= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.4/go.mod h1:IOAPF6oT9KCsceNTvvYMNHy0+kMF8akOjeDvPENWxp4= +github.com/aws/aws-sdk-go-v2/config v1.32.9 h1:ktda/mtAydeObvJXlHzyGpK1xcsLaP16zfUPDGoW90A= +github.com/aws/aws-sdk-go-v2/config v1.32.9/go.mod h1:U+fCQ+9QKsLW786BCfEjYRj34VVTbPdsLP3CHSYXMOI= +github.com/aws/aws-sdk-go-v2/credentials v1.19.9 h1:sWvTKsyrMlJGEuj/WgrwilpoJ6Xa1+KhIpGdzw7mMU8= +github.com/aws/aws-sdk-go-v2/credentials v1.19.9/go.mod h1:+J44MBhmfVY/lETFiKI+klz0Vym2aCmIjqgClMmW82w= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.17 h1:I0GyV8wiYrP8XpA70g1HBcQO1JlQxCMTW9npl5UbDHY= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.17/go.mod h1:tyw7BOl5bBe/oqvoIeECFJjMdzXoa/dfVz3QQ5lgHGA= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.17 h1:xOLELNKGp2vsiteLsvLPwxC+mYmO6OZ8PYgiuPJzF8U= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.17/go.mod h1:5M5CI3D12dNOtH3/mk6minaRwI2/37ifCURZISxA/IQ= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.17 h1:WWLqlh79iO48yLkj1v3ISRNiv+3KdQoZ6JWyfcsyQik= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.17/go.mod h1:EhG22vHRrvF8oXSTYStZhJc1aUgKtnJe+aOiFEV90cM= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 h1:WKuaxf++XKWlHWu9ECbMlha8WOEGm0OUEZqm4K/Gcfk= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4/go.mod h1:ZWy7j6v1vWGmPReu0iSGvRiise4YI5SkR3OHKTZ6Wuc= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.4 h1:0ryTNEdJbzUCEWkVXEXoqlXV72J5keC1GvILMOuD00E= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.4/go.mod h1:HQ4qwNZh32C3CBeO6iJLQlgtMzqeG17ziAA/3KDJFow= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.17 h1:RuNSMoozM8oXlgLG/n6WLaFGoea7/CddrCfIiSA+xdY= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.17/go.mod h1:F2xxQ9TZz5gDWsclCtPQscGpP0VUOc8RqgFM3vDENmU= +github.com/aws/aws-sdk-go-v2/service/signin v1.0.5 h1:VrhDvQib/i0lxvr3zqlUwLwJP4fpmpyD9wYG1vfSu+Y= +github.com/aws/aws-sdk-go-v2/service/signin v1.0.5/go.mod h1:k029+U8SY30/3/ras4G/Fnv/b88N4mAfliNn08Dem4M= +github.com/aws/aws-sdk-go-v2/service/sso v1.30.10 h1:+VTRawC4iVY58pS/lzpo0lnoa/SYNGF4/B/3/U5ro8Y= +github.com/aws/aws-sdk-go-v2/service/sso v1.30.10/go.mod h1:yifAsgBxgJWn3ggx70A3urX2AN49Y5sJTD1UQFlfqBw= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.14 h1:0jbJeuEHlwKJ9PfXtpSFc4MF+WIWORdhN1n30ITZGFM= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.14/go.mod h1:sTGThjphYE4Ohw8vJiRStAcu3rbjtXRsdNB0TvZ5wwo= +github.com/aws/aws-sdk-go-v2/service/sts v1.41.6 h1:5fFjR/ToSOzB2OQ/XqWpZBmNvmP/pJ1jOWYlFDJTjRQ= +github.com/aws/aws-sdk-go-v2/service/sts v1.41.6/go.mod h1:qgFDZQSD/Kys7nJnVqYlWKnh0SSdMjAi0uSwON4wgYQ= +github.com/aws/smithy-go v1.24.1 h1:VbyeNfmYkWoxMVpGUAbQumkODcYmfMRfZ8yQiH30SK0= +github.com/aws/smithy-go v1.24.1/go.mod h1:LEj2LM3rBRQJxPZTB4KuzZkaZYnZPnvgIhb4pu07mx0= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/charmbracelet/anthropic-sdk-go v0.0.0-20260223140439-63879b0b8dab h1:J7XQLgl9sefgTnTGrmX3xqvp5o6MCiBzEjGv5igAlc4= +github.com/charmbracelet/anthropic-sdk-go v0.0.0-20260223140439-63879b0b8dab/go.mod h1:hqlYqR7uPKOKfnNeicUbZp0Ps0GeYFlKYtwh5HGDCx8= +github.com/charmbracelet/x/exp/slice v0.0.0-20250904123553-b4e2667e5ad5 h1:DTSZxdV9qQagD4iGcAt9RgaRBZtJl01bfKgdLzUzUPI= +github.com/charmbracelet/x/exp/slice v0.0.0-20250904123553-b4e2667e5ad5/go.mod h1:vI5nDVMWi6veaYH+0Fmvpbe/+cv/iJfMntdh+N0+Tms= +github.com/cncf/xds/go v0.0.0-20260202195803-dba9d589def2 h1:aBangftG7EVZoUb69Os8IaYg++6uMOdKK83QtkkvJik= +github.com/cncf/xds/go v0.0.0-20260202195803-dba9d589def2/go.mod h1:qwXFYgsP6T7XnJtbKlf1HP8AjxZZyzxMmc+Lq5GjlU4= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dnaeon/go-vcr v1.2.0 h1:zHCHvJYTMh1N7xnV7zf1m1GPBF9Ad0Jk/whtQ1663qI= +github.com/dnaeon/go-vcr v1.2.0/go.mod h1:R4UdLID7HZT3taECzJs4YgbbH6PIGXB6W/sc5OLb6RQ= +github.com/envoyproxy/go-control-plane v0.14.0 h1:hbG2kr4RuFj222B6+7T83thSPqLjwBIfQawTkC++2HA= +github.com/envoyproxy/go-control-plane/envoy v1.37.0 h1:u3riX6BoYRfF4Dr7dwSOroNfdSbEPe9Yyl09/B6wBrQ= +github.com/envoyproxy/go-control-plane/envoy v1.37.0/go.mod h1:DReE9MMrmecPy+YvQOAOHNYMALuowAnbjjEMkkWOi6A= +github.com/envoyproxy/protoc-gen-validate v1.3.3 h1:MVQghNeW+LZcmXe7SY1V36Z+WFMDjpqGAGacLe2T0ds= +github.com/envoyproxy/protoc-gen-validate v1.3.3/go.mod h1:TsndJ/ngyIdQRhMcVVGDDHINPLWB7C82oDArY51KfB0= +github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= +github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/go-json-experiment/json v0.0.0-20251027170946-4849db3c2f7e h1:Lf/gRkoycfOBPa42vU2bbgPurFong6zXeFtPoxholzU= +github.com/go-json-experiment/json v0.0.0-20251027170946-4849db3c2f7e/go.mod h1:uNVvRXArCGbZ508SxYYTC5v1JWoz2voff5pm25jU1Ok= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-viper/mapstructure/v2 v2.5.0 h1:vM5IJoUAy3d7zRSVtIwQgBj7BiWtMPfmPEgAXnvj1Ro= +github.com/go-viper/mapstructure/v2 v2.5.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= +github.com/goccy/go-yaml v1.19.2 h1:PmFC1S6h8ljIz6gMRBopkjP1TVT7xuwrButHID66PoM= +github.com/goccy/go-yaml v1.19.2/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0= +github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/googleapis/enterprise-certificate-proxy v0.3.12 h1:Fg+zsqzYEs1ZnvmcztTYxhgCBsx3eEhEwQ1W/lHq/sQ= +github.com/googleapis/enterprise-certificate-proxy v0.3.12/go.mod h1:vqVt9yG9480NtzREnTlmGSBmFrA+bzb0yl0TxoBQXOg= +github.com/googleapis/gax-go/v2 v2.17.0 h1:RksgfBpxqff0EZkDWYuz9q/uWsTVz+kf43LsZ1J6SMc= +github.com/googleapis/gax-go/v2 v2.17.0/go.mod h1:mzaqghpQp4JDh3HvADwrat+6M3MOIDp5YKHhb9PAgDY= +github.com/hugodutka/fantasy v0.0.0-20260310103256-242e3d156af6 h1:bFV3LXw6nN97y1aSpQ4rBygqnFTFt1afIevClVUZ7uQ= +github.com/hugodutka/fantasy v0.0.0-20260310103256-242e3d156af6/go.mod h1:KIeNQUpJTswwpY0P6HJsr3LBFgfTDb8FDpOdVQMsKqY= +github.com/kaptinlin/go-i18n v0.2.11 h1:OayNt8mWt8nDaqAOp09/C1VG9Y5u8LpQnnxbyGARDV4= +github.com/kaptinlin/go-i18n v0.2.11/go.mod h1:pVcu9qsW5pOIOoZFJXesRYmLos1vMQrby70JPAoWmJU= +github.com/kaptinlin/jsonpointer v0.4.16 h1:Ux4w4FY+uLv+K+TxaCJtM/TpPv+1+eS6gH4Z9/uhOuA= +github.com/kaptinlin/jsonpointer v0.4.16/go.mod h1:SsfsjqnHG5zuKo1DTBzk1VknaHlL4osHw+X9kZKukpU= +github.com/kaptinlin/jsonschema v0.7.3 h1:kyIydij76ORiSxmfy0xFYy0cOx8MwG6pyyaSoQshsK4= +github.com/kaptinlin/jsonschema v0.7.3/go.mod h1:Ys6zr+W6/1330FzZEouFrAYImK+AmYt5HQVTHQQXQo8= +github.com/kaptinlin/messageformat-go v0.4.18 h1:RBlHVWgZyoxTcUgGWBsl2AcyScq/urqbLZvzgryTmSI= +github.com/kaptinlin/messageformat-go v0.4.18/go.mod h1:ntI3154RnqJgr7GaC+vZBnIExl2V3sv9selvRNNEM24= +github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4= +github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY= +github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo= +github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY= +github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= +github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= +github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= +github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY= +github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28= +go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= +go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.65.0 h1:XmiuHzgJt067+a6kwyAzkhXooYVv3/TOw9cM2VfJgUM= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.65.0/go.mod h1:KDgtbWKTQs4bM+VPUr6WlL9m/WXcmkCcBlIzqxPGzmI= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.65.0 h1:7iP2uCb7sGddAr30RRS6xjKy7AZ2JtTOPA3oolgVSw8= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.65.0/go.mod h1:c7hN3ddxs/z6q9xwvfLPk+UHlWRQyaeR1LdgfL/66l0= +go.opentelemetry.io/otel v1.40.0 h1:oA5YeOcpRTXq6NN7frwmwFR0Cn3RhTVZvXsP4duvCms= +go.opentelemetry.io/otel v1.40.0/go.mod h1:IMb+uXZUKkMXdPddhwAHm6UfOwJyh4ct1ybIlV14J0g= +go.opentelemetry.io/otel/metric v1.40.0 h1:rcZe317KPftE2rstWIBitCdVp89A2HqjkxR3c11+p9g= +go.opentelemetry.io/otel/metric v1.40.0/go.mod h1:ib/crwQH7N3r5kfiBZQbwrTge743UDc7DTFVZrrXnqc= +go.opentelemetry.io/otel/sdk v1.40.0 h1:KHW/jUzgo6wsPh9At46+h4upjtccTmuZCFAc9OJ71f8= +go.opentelemetry.io/otel/sdk v1.40.0/go.mod h1:Ph7EFdYvxq72Y8Li9q8KebuYUr2KoeyHx0DRMKrYBUE= +go.opentelemetry.io/otel/sdk/metric v1.40.0 h1:mtmdVqgQkeRxHgRv4qhyJduP3fYJRMX4AtAlbuWdCYw= +go.opentelemetry.io/otel/sdk/metric v1.40.0/go.mod h1:4Z2bGMf0KSK3uRjlczMOeMhKU2rhUqdWNoKcYrtcBPg= +go.opentelemetry.io/otel/trace v1.40.0 h1:WA4etStDttCSYuhwvEa8OP8I5EWu24lkOzp+ZYblVjw= +go.opentelemetry.io/otel/trace v1.40.0/go.mod h1:zeAhriXecNGP/s2SEG3+Y8X9ujcJOTqQ5RgdEJcawiA= +golang.org/x/crypto v0.48.0 h1:/VRzVqiRSggnhY7gNRxPauEQ5Drw9haKdM0jqfcCFts= +golang.org/x/crypto v0.48.0/go.mod h1:r0kV5h3qnFPlQnBSrULhlsRfryS2pmewsg+XfMgkVos= +golang.org/x/net v0.50.0 h1:ucWh9eiCGyDR3vtzso0WMQinm2Dnt8cFMuQa9K33J60= +golang.org/x/net v0.50.0/go.mod h1:UgoSli3F/pBgdJBHCTc+tp3gmrU4XswgGRgtnwWTfyM= +golang.org/x/oauth2 v0.35.0 h1:Mv2mzuHuZuY2+bkyWXIHMfhNdJAdwW3FuWeCPYN5GVQ= +golang.org/x/oauth2 v0.35.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= +golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= +golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k= +golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/text v0.34.0 h1:oL/Qq0Kdaqxa1KbNeMKwQq0reLCCaFtqu2eNuSeNHbk= +golang.org/x/text v0.34.0/go.mod h1:homfLqTYRFyVYemLBFl5GgL/DWEiH5wcsQ5gSh1yziA= +golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI= +golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= +gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= +gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= +google.golang.org/api v0.267.0 h1:w+vfWPMPYeRs8qH1aYYsFX68jMls5acWl/jocfLomwE= +google.golang.org/api v0.267.0/go.mod h1:Jzc0+ZfLnyvXma3UtaTl023TdhZu6OMBP9tJ+0EmFD0= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260217215200-42d3e9bedb6d h1:t/LOSXPJ9R0B6fnZNyALBRfZBH0Uy0gT+uR+SJ6syqQ= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260217215200-42d3e9bedb6d/go.mod h1:4Hqkh8ycfw05ld/3BWL7rJOSfebL2Q+DVDeRgYgxUU8= +google.golang.org/grpc v1.79.1 h1:zGhSi45ODB9/p3VAawt9a+O/MULLl9dpizzNNpq7flY= +google.golang.org/grpc v1.79.1/go.mod h1:KmT0Kjez+0dde/v2j9vzwoAScgEPx/Bw1CYChhHLrHQ= +google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= +google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= +gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/examples/agent-go/main.go b/examples/agent-go/main.go new file mode 100644 index 0000000..2322d5f --- /dev/null +++ b/examples/agent-go/main.go @@ -0,0 +1,871 @@ +// Portable Desktop AI Agent Example (Go / Fantasy) +// +// Drives a virtual desktop via the `portabledesktop` CLI binary and +// lets Claude interact with it through Anthropic's computer-use tool +// protocol, using the Fantasy AI SDK for Go. +// +// Usage: +// +// go run . --prompt "Open coder.com and confirm the homepage title." +// PORTABLEDESKTOP_BIN=/path/to/portabledesktop go run . --prompt "Do something." +package main + +import ( + "bufio" + "context" + "encoding/base64" + "encoding/json" + "flag" + "fmt" + "math" + "os" + "os/exec" + "os/signal" + "path/filepath" + "runtime" + "strconv" + "strings" + "syscall" + "time" + + "charm.land/fantasy" + "charm.land/fantasy/providers/anthropic" +) + +// --------------------------------------------------------------------------- +// Constants +// --------------------------------------------------------------------------- + +const ( + defaultPrompt = "Navigate to news.ycombinator.com and tell me what the top story is." + defaultWidth = 1280 + defaultHeight = 800 + defaultViewerPort = 6080 + defaultModel = "claude-opus-4-6" + defaultMaxSteps = 100 + defaultScreenshotToMS = 20000 + + // Anthropic recommended screenshot limits. + maxScreenshotLongEdge = 1568 + maxScreenshotPixels = 1_150_000 +) + +// --------------------------------------------------------------------------- +// CLI flags +// --------------------------------------------------------------------------- + +var ( + flagPrompt = flag.String("prompt", defaultPrompt, "Prompt to send to the agent") + flagModel = flag.String("model", defaultModel, "Anthropic model ID") + flagMaxSteps = flag.Int("max-steps", defaultMaxSteps, "Maximum agent steps") +) + +func portabledesktopBin() string { + if v := os.Getenv("PORTABLEDESKTOP_BIN"); v != "" { + return v + } + return "portabledesktop" +} + +// --------------------------------------------------------------------------- +// .env.local loader +// --------------------------------------------------------------------------- + +func loadEnvLocal() { + candidates := []string{ + filepath.Join("..", "..", ".env.local"), + filepath.Join("..", "..", "..", ".env.local"), + } + for _, p := range candidates { + f, err := os.Open(p) + if err != nil { + continue + } + defer f.Close() + scanner := bufio.NewScanner(f) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + line = strings.TrimPrefix(line, "export ") + idx := strings.Index(line, "=") + if idx == -1 { + continue + } + key := strings.TrimSpace(line[:idx]) + val := strings.TrimSpace(line[idx+1:]) + // Strip surrounding quotes. + if len(val) >= 2 && + ((val[0] == '"' && val[len(val)-1] == '"') || + (val[0] == '\'' && val[len(val)-1] == '\'')) { + val = val[1 : len(val)-1] + } + if os.Getenv(key) == "" { + os.Setenv(key, val) + } + } + break + } +} + +// --------------------------------------------------------------------------- +// Desktop session — wraps the portabledesktop CLI lifecycle +// --------------------------------------------------------------------------- + +type desktopInfo struct { + RuntimeDir string `json:"runtimeDir"` + Display int `json:"display"` + VNCPort int `json:"vncPort"` + Geometry string `json:"geometry"` + Depth int `json:"depth"` + DPI int `json:"dpi"` + DesktopSizeMode string `json:"desktopSizeMode"` + SessionDir string `json:"sessionDir"` + CleanupSessionDirOnStop bool `json:"cleanupSessionDirOnStop"` + Detached bool `json:"detached"` + StateFile string `json:"stateFile"` + StartedAt string `json:"startedAt"` +} + +type desktopSession struct { + info *desktopInfo + cmd *exec.Cmd +} + +func startDesktop(geometry, background string) (*desktopSession, error) { + bin := portabledesktopBin() + args := []string{"up", "--json", "--foreground"} + if geometry != "" { + args = append(args, "--geometry", geometry) + } + if background != "" { + args = append(args, "--background", background) + } + + cmd := exec.Command(bin, args...) + cmd.Stderr = os.Stderr + stdout, err := cmd.StdoutPipe() + if err != nil { + return nil, fmt.Errorf("stdout pipe: %w", err) + } + if err := cmd.Start(); err != nil { + return nil, fmt.Errorf("start portabledesktop: %w", err) + } + + scanner := bufio.NewScanner(stdout) + if !scanner.Scan() { + _ = cmd.Process.Kill() + return nil, fmt.Errorf("no output from portabledesktop up") + } + + var info desktopInfo + if err := json.Unmarshal(scanner.Bytes(), &info); err != nil { + _ = cmd.Process.Kill() + return nil, fmt.Errorf("parse desktop info: %w", err) + } + + return &desktopSession{info: &info, cmd: cmd}, nil +} + +func (s *desktopSession) stop() { + if s.cmd != nil && s.cmd.Process != nil { + _ = s.cmd.Process.Signal(syscall.SIGTERM) + _ = s.cmd.Wait() + } +} + +// --------------------------------------------------------------------------- +// CLI exec helpers +// --------------------------------------------------------------------------- + +func pdExec(args ...string) (string, error) { + cmd := exec.Command(portabledesktopBin(), args...) + out, err := cmd.Output() + if err != nil { + if ee, ok := err.(*exec.ExitError); ok { + return "", fmt.Errorf("%s: %s", err, string(ee.Stderr)) + } + return "", err + } + return string(out), nil +} + +func pdExecVoid(args ...string) error { + _, err := pdExec(args...) + return err +} + +// --------------------------------------------------------------------------- +// Recording +// --------------------------------------------------------------------------- + +type recordingHandle struct { + cmd *exec.Cmd +} + +func startRecording(file string) *recordingHandle { + cmd := exec.Command(portabledesktopBin(), + "record", + "--idle-speedup", "20", + "--idle-min-duration", "0.35", + "--idle-noise-tolerance", "-38dB", + file, + ) + cmd.Stdout = nil + cmd.Stderr = nil + _ = cmd.Start() + return &recordingHandle{cmd: cmd} +} + +func (r *recordingHandle) stop() { + if r.cmd != nil && r.cmd.Process != nil { + _ = r.cmd.Process.Signal(syscall.SIGINT) + _ = r.cmd.Wait() + } +} + +// --------------------------------------------------------------------------- +// Viewer +// --------------------------------------------------------------------------- + +func startViewer(port int) *exec.Cmd { + cmd := exec.Command(portabledesktopBin(), + "viewer", + "--port", strconv.Itoa(port), + "--host", "127.0.0.1", + "--no-open", + ) + cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + _ = cmd.Start() + return cmd +} + +// --------------------------------------------------------------------------- +// Host browser opener +// --------------------------------------------------------------------------- + +func openHostBrowser(url string) { + var commands [][]string + if runtime.GOOS == "darwin" { + commands = [][]string{{"open", url}} + } else { + commands = [][]string{ + {"xdg-open", url}, + {"sensible-browser", url}, + } + } + for _, c := range commands { + cmd := exec.Command(c[0], c[1:]...) + if cmd.Start() == nil { + _ = cmd.Process.Release() + return + } + } + fmt.Fprintf(os.Stdout, " Open manually: %s\n", url) +} + +// --------------------------------------------------------------------------- +// Desktop browser launcher +// --------------------------------------------------------------------------- + +func resolveDesktopBrowser() string { + candidates := []string{ + "google-chrome-stable", + "google-chrome", + "chromium-browser", + "chromium", + "firefox", + } + for _, name := range candidates { + if p, err := exec.LookPath(name); err == nil { + return p + } + } + return "" +} + +func launchDesktopBrowser(url string) { + browser := resolveDesktopBrowser() + if browser == "" { + fmt.Fprintln(os.Stderr, "warning: no browser found inside the desktop") + return + } + + args := []string{browser, "--no-first-run", "--disable-session-crashed-bubble"} + base := filepath.Base(browser) + if strings.Contains(base, "chrom") { + args = append(args, + "--disable-infobars", + "--no-default-browser-check", + fmt.Sprintf("--window-size=%d,%d", defaultWidth, defaultHeight), + url, + ) + } else { + args = append(args, url) + } + + allArgs := append([]string{"open", "--"}, args...) + _ = pdExecVoid(allArgs...) +} + +// --------------------------------------------------------------------------- +// Screenshot sizing +// --------------------------------------------------------------------------- + +func computeScaledSize(w, h int) (int, int) { + longEdge := float64(max(w, h)) + totalPx := float64(w * h) + + longEdgeScale := float64(maxScreenshotLongEdge) / longEdge + totalScale := math.Sqrt(float64(maxScreenshotPixels) / totalPx) + scale := math.Min(1, math.Min(longEdgeScale, totalScale)) + + if scale >= 1 { + return w, h + } + return max(1, int(math.Floor(float64(w)*scale))), + max(1, int(math.Floor(float64(h)*scale))) +} + +// --------------------------------------------------------------------------- +// Computer action types (matching Anthropic computer_20251124) +// --------------------------------------------------------------------------- + +type computerAction struct { + Action string `json:"action"` + Coordinate *[2]int `json:"coordinate,omitempty"` + StartCoordinate *[2]int `json:"start_coordinate,omitempty"` + Text string `json:"text,omitempty"` + ScrollDirection string `json:"scroll_direction,omitempty"` + ScrollAmount *int `json:"scroll_amount,omitempty"` + Duration *float64 `json:"duration,omitempty"` + Region *[4]int `json:"region,omitempty"` +} + +// --------------------------------------------------------------------------- +// Computer tool execution +// --------------------------------------------------------------------------- + +func clampCoord(x, y int) (int, int) { + return max(0, min(defaultWidth-1, x)), max(0, min(defaultHeight-1, y)) +} + +// executeComputerAction runs a single computer action and returns a tool +// result that can be fed back into the conversation. +func executeComputerAction(input computerAction) ([]fantasy.ToolResultOutputContent, error) { + switch input.Action { + case "key": + if input.Text == "" { + return textResult("text is required for key action"), nil + } + if err := pdExecVoid("keyboard", "key", input.Text); err != nil { + return errorResult(err.Error()), nil + } + return textResult(fmt.Sprintf("pressed key combo: %s", input.Text)), nil + + case "hold_key": + if input.Text == "" { + return errorResult("text is required for hold_key action"), nil + } + keys := strings.Split(input.Text, "+") + var pressed []string + for _, k := range keys { + k = strings.TrimSpace(k) + if k == "" { + continue + } + if err := pdExecVoid("keyboard", "down", k); err != nil { + for i := len(pressed) - 1; i >= 0; i-- { + _ = pdExecVoid("keyboard", "up", pressed[i]) + } + return errorResult(err.Error()), nil + } + pressed = append(pressed, k) + } + dur := 250 * time.Millisecond + if input.Duration != nil { + dur = time.Duration(*input.Duration * float64(time.Second)) + if dur < 10*time.Millisecond { + dur = 10 * time.Millisecond + } + } + time.Sleep(dur) + for i := len(pressed) - 1; i >= 0; i-- { + _ = pdExecVoid("keyboard", "up", pressed[i]) + } + return textResult(fmt.Sprintf("held keys for %dms: %s", dur.Milliseconds(), input.Text)), nil + + case "type": + if input.Text == "" { + return errorResult("text is required for type action"), nil + } + if err := pdExecVoid("keyboard", "type", input.Text); err != nil { + return errorResult(err.Error()), nil + } + return textResult(fmt.Sprintf("typed %d characters", len(input.Text))), nil + + case "cursor_position": + out, err := pdExec("cursor", "--json") + if err != nil { + return errorResult(err.Error()), nil + } + var pos struct { + X int `json:"x"` + Y int `json:"y"` + } + if err := json.Unmarshal([]byte(strings.TrimSpace(out)), &pos); err != nil { + return errorResult(err.Error()), nil + } + return textResult(fmt.Sprintf("cursor at %d,%d", pos.X, pos.Y)), nil + + case "mouse_move": + if input.Coordinate == nil { + return errorResult("coordinate is required for mouse_move"), nil + } + x, y := clampCoord(input.Coordinate[0], input.Coordinate[1]) + if err := pdExecVoid("mouse", "move", strconv.Itoa(x), strconv.Itoa(y)); err != nil { + return errorResult(err.Error()), nil + } + return textResult(fmt.Sprintf("moved mouse to %d,%d", x, y)), nil + + case "left_click": + if input.Coordinate != nil { + x, y := clampCoord(input.Coordinate[0], input.Coordinate[1]) + if err := pdExecVoid("mouse", "move", strconv.Itoa(x), strconv.Itoa(y)); err != nil { + return errorResult(err.Error()), nil + } + } + if err := pdExecVoid("mouse", "click", "left"); err != nil { + return errorResult(err.Error()), nil + } + return textResult("left click"), nil + + case "left_click_drag": + if input.StartCoordinate == nil { + return errorResult("start_coordinate is required for left_click_drag"), nil + } + if input.Coordinate == nil { + return errorResult("coordinate is required for left_click_drag"), nil + } + sx, sy := clampCoord(input.StartCoordinate[0], input.StartCoordinate[1]) + ex, ey := clampCoord(input.Coordinate[0], input.Coordinate[1]) + _ = pdExecVoid("mouse", "move", strconv.Itoa(sx), strconv.Itoa(sy)) + _ = pdExecVoid("mouse", "down", "left") + _ = pdExecVoid("mouse", "move", strconv.Itoa(ex), strconv.Itoa(ey)) + _ = pdExecVoid("mouse", "up", "left") + return textResult(fmt.Sprintf("dragged from %d,%d to %d,%d", sx, sy, ex, ey)), nil + + case "left_mouse_down": + if err := pdExecVoid("mouse", "down", "left"); err != nil { + return errorResult(err.Error()), nil + } + return textResult("left mouse down"), nil + + case "left_mouse_up": + if err := pdExecVoid("mouse", "up", "left"); err != nil { + return errorResult(err.Error()), nil + } + return textResult("left mouse up"), nil + + case "right_click": + if input.Coordinate != nil { + x, y := clampCoord(input.Coordinate[0], input.Coordinate[1]) + _ = pdExecVoid("mouse", "move", strconv.Itoa(x), strconv.Itoa(y)) + } + if err := pdExecVoid("mouse", "click", "right"); err != nil { + return errorResult(err.Error()), nil + } + return textResult("right click"), nil + + case "middle_click": + if input.Coordinate != nil { + x, y := clampCoord(input.Coordinate[0], input.Coordinate[1]) + _ = pdExecVoid("mouse", "move", strconv.Itoa(x), strconv.Itoa(y)) + } + if err := pdExecVoid("mouse", "click", "middle"); err != nil { + return errorResult(err.Error()), nil + } + return textResult("middle click"), nil + + case "double_click": + if input.Coordinate != nil { + x, y := clampCoord(input.Coordinate[0], input.Coordinate[1]) + _ = pdExecVoid("mouse", "move", strconv.Itoa(x), strconv.Itoa(y)) + } + _ = pdExecVoid("mouse", "click", "left") + _ = pdExecVoid("mouse", "click", "left") + return textResult("double click"), nil + + case "triple_click": + if input.Coordinate != nil { + x, y := clampCoord(input.Coordinate[0], input.Coordinate[1]) + _ = pdExecVoid("mouse", "move", strconv.Itoa(x), strconv.Itoa(y)) + } + _ = pdExecVoid("mouse", "click", "left") + _ = pdExecVoid("mouse", "click", "left") + _ = pdExecVoid("mouse", "click", "left") + return textResult("triple click"), nil + + case "scroll": + if input.Coordinate != nil { + x, y := clampCoord(input.Coordinate[0], input.Coordinate[1]) + _ = pdExecVoid("mouse", "move", strconv.Itoa(x), strconv.Itoa(y)) + } + amount := 3 + if input.ScrollAmount != nil { + amount = max(1, *input.ScrollAmount) + } + dir := input.ScrollDirection + if dir == "" { + dir = "down" + } + var dx, dy int + switch dir { + case "up": + dy = -amount + case "down": + dy = amount + case "left": + dx = -amount + case "right": + dx = amount + } + if err := pdExecVoid("mouse", "scroll", strconv.Itoa(dx), strconv.Itoa(dy)); err != nil { + return errorResult(err.Error()), nil + } + return textResult(fmt.Sprintf("scrolled %s by %d", dir, amount)), nil + + case "wait": + dur := 1.0 + if input.Duration != nil { + dur = *input.Duration + } + ms := max(10, int(math.Round(dur*1000))) + time.Sleep(time.Duration(ms) * time.Millisecond) + return textResult(fmt.Sprintf("waited %dms", ms)), nil + + case "screenshot": + return captureScreenshotResult(nil) + + case "zoom": + return captureScreenshotResult(input.Region) + + default: + return errorResult(fmt.Sprintf("unsupported action: %s", input.Action)), nil + } +} + +func textResult(text string) []fantasy.ToolResultOutputContent { + return []fantasy.ToolResultOutputContent{ + fantasy.ToolResultOutputContentText{Text: text}, + } +} + +func errorResult(text string) []fantasy.ToolResultOutputContent { + return []fantasy.ToolResultOutputContent{ + fantasy.ToolResultOutputContentError{Error: fmt.Errorf("%s", text)}, + } +} + +// captureScreenshotResult takes a screenshot and returns it as a tool +// result containing a base64-encoded PNG image. +func captureScreenshotResult(region *[4]int) ([]fantasy.ToolResultOutputContent, error) { + tw, th := computeScaledSize(defaultWidth, defaultHeight) + + args := []string{ + "screenshot", + "--json", + "--target-width", strconv.Itoa(tw), + "--target-height", strconv.Itoa(th), + } + + if region != nil { + left := max(0, min(region[0], region[2])) + top := max(0, min(region[1], region[3])) + right := min(defaultWidth, max(region[0], region[2])) + bottom := min(defaultHeight, max(region[1], region[3])) + w := right - left + h := bottom - top + if w > 0 && h > 0 { + args = append(args, + "--x", strconv.Itoa(left), + "--y", strconv.Itoa(top), + "--width", strconv.Itoa(w), + "--height", strconv.Itoa(h), + "--scale-to-geometry", + ) + } + } + + args = append(args, "--timeout-ms", strconv.Itoa(defaultScreenshotToMS)) + + out, err := pdExec(args...) + if err != nil { + return errorResult(fmt.Sprintf("screenshot: %v", err)), nil + } + + var result struct { + Data string `json:"data"` + } + if err := json.Unmarshal([]byte(strings.TrimSpace(out)), &result); err != nil { + return errorResult(fmt.Sprintf("parse screenshot: %v", err)), nil + } + + // Decode to verify, then return as base64 media content. + if _, err := base64.StdEncoding.DecodeString(result.Data); err != nil { + return errorResult(fmt.Sprintf("invalid base64 screenshot: %v", err)), nil + } + + return []fantasy.ToolResultOutputContent{ + fantasy.ToolResultOutputContentMedia{ + Data: result.Data, + MediaType: "image/png", + }, + }, nil +} + +// --------------------------------------------------------------------------- +// Agent loop — drives model.Generate with tool results fed back +// --------------------------------------------------------------------------- + +func saveMessages(path string, messages fantasy.Prompt) { + data, err := json.MarshalIndent(messages, "", " ") + if err != nil { + fmt.Fprintf(os.Stderr, "warning: failed to marshal messages: %v\n", err) + return + } + if err := os.WriteFile(path, data, 0o644); err != nil { + fmt.Fprintf(os.Stderr, "warning: failed to write messages: %v\n", err) + } +} + +func runAgentLoop(ctx context.Context, model fantasy.LanguageModel, computerTool fantasy.ProviderDefinedTool, prompt string, maxSteps int, messagesPath string) error { + systemMsg := fantasy.NewSystemMessage( + "Use the computer tool to complete the user prompt in the already-open browser window. " + + "Prefer direct actions and keep steps concise. Do not ask any questions, just perform the task.", + ) + + messages := fantasy.Prompt{ + systemMsg, + fantasy.NewUserMessage(prompt), + } + + tools := []fantasy.Tool{computerTool} + + saveMessages(messagesPath, messages) + + for step := 0; step < maxSteps; step++ { + if ctx.Err() != nil { + return ctx.Err() + } + + resp, err := model.Generate(ctx, fantasy.Call{ + Prompt: messages, + Tools: tools, + }) + if err != nil { + return fmt.Errorf("generate (step %d): %w", step, err) + } + + // Collect tool calls and any text from the response. + var toolCalls []fantasy.ToolCallContent + for _, c := range resp.Content { + switch c.GetType() { + case fantasy.ContentTypeText: + if tc, ok := fantasy.AsContentType[fantasy.TextContent](c); ok && tc.Text != "" { + fmt.Print(tc.Text) + } + case fantasy.ContentTypeToolCall: + if tc, ok := fantasy.AsContentType[fantasy.ToolCallContent](c); ok { + toolCalls = append(toolCalls, tc) + } + } + } + + // If no tool calls, the model is done. + if len(toolCalls) == 0 { + fmt.Println() + return nil + } + + // Build assistant message with the tool calls. + var assistantParts []fantasy.MessagePart + for _, c := range resp.Content { + switch c.GetType() { + case fantasy.ContentTypeText: + if tc, ok := fantasy.AsContentType[fantasy.TextContent](c); ok { + assistantParts = append(assistantParts, fantasy.TextPart{Text: tc.Text}) + } + case fantasy.ContentTypeToolCall: + if tc, ok := fantasy.AsContentType[fantasy.ToolCallContent](c); ok { + assistantParts = append(assistantParts, fantasy.ToolCallPart{ + ToolCallID: tc.ToolCallID, + ToolName: tc.ToolName, + Input: tc.Input, + }) + } + } + } + messages = append(messages, fantasy.Message{ + Role: fantasy.MessageRoleAssistant, + Content: assistantParts, + }) + + // Execute each tool call and build tool result messages. + var toolResultParts []fantasy.MessagePart + for _, tc := range toolCalls { + fmt.Fprintf(os.Stderr, " [step %d] tool: %s (id=%s)\n", step, tc.ToolName, tc.ToolCallID) + + var action computerAction + if err := json.Unmarshal([]byte(tc.Input), &action); err != nil { + toolResultParts = append(toolResultParts, fantasy.ToolResultPart{ + ToolCallID: tc.ToolCallID, + Output: fantasy.ToolResultOutputContentText{Text: fmt.Sprintf("invalid input: %v", err)}, + }) + continue + } + + fmt.Fprintf(os.Stderr, " [step %d] action: %s\n", step, action.Action) + + results, err := executeComputerAction(action) + if err != nil { + toolResultParts = append(toolResultParts, fantasy.ToolResultPart{ + ToolCallID: tc.ToolCallID, + Output: fantasy.ToolResultOutputContentText{Text: fmt.Sprintf("error: %v", err)}, + }) + continue + } + + // Use the first result part as the output. + if len(results) > 0 { + toolResultParts = append(toolResultParts, fantasy.ToolResultPart{ + ToolCallID: tc.ToolCallID, + Output: results[0], + }) + } + } + + messages = append(messages, fantasy.Message{ + Role: fantasy.MessageRoleTool, + Content: toolResultParts, + }) + + saveMessages(messagesPath, messages) + + // If the model didn't finish because of tool calls, stop. + if resp.FinishReason != fantasy.FinishReasonToolCalls { + return nil + } + } + + fmt.Fprintf(os.Stderr, "reached max steps (%d)\n", maxSteps) + return nil +} + +// --------------------------------------------------------------------------- +// Main +// --------------------------------------------------------------------------- + +func main() { + flag.Parse() + loadEnvLocal() + + if os.Getenv("ANTHROPIC_API_KEY") == "" { + fmt.Fprintln(os.Stderr, "ANTHROPIC_API_KEY is missing. Set it in environment or .env.local at repo root.") + os.Exit(1) + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Handle SIGINT/SIGTERM for graceful shutdown. + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) + go func() { + sig := <-sigCh + fmt.Fprintf(os.Stderr, "\nreceived %s, shutting down...\n", sig) + cancel() + }() + + fmt.Println("starting portable desktop...") + session, err := startDesktop( + fmt.Sprintf("%dx%d", defaultWidth, defaultHeight), + "#1f252f", + ) + if err != nil { + fmt.Fprintf(os.Stderr, "error: %v\n", err) + os.Exit(1) + } + defer session.stop() + + fmt.Printf("display :%d vnc :%d geometry %s\n", + session.info.Display, session.info.VNCPort, session.info.Geometry) + + // Start recording. + tmpDir := filepath.Join("tmp") + _ = os.MkdirAll(tmpDir, 0o755) + recordingPath, _ := filepath.Abs( + filepath.Join(tmpDir, fmt.Sprintf("agent-%d.mp4", time.Now().UnixMilli())), + ) + recording := startRecording(recordingPath) + fmt.Printf("recording: %s\n", recordingPath) + + // Start the viewer. + viewerCmd := startViewer(defaultViewerPort) + viewerURL := fmt.Sprintf("http://127.0.0.1:%d", defaultViewerPort) + fmt.Printf("viewer: %s\n", viewerURL) + openHostBrowser(viewerURL) + + // Let the desktop settle, then launch a browser inside it. + time.Sleep(1500 * time.Millisecond) + launchDesktopBrowser("about:blank") + time.Sleep(2000 * time.Millisecond) + + // Set up the Anthropic provider and model. + provider, err := anthropic.New(anthropic.WithAPIKey(os.Getenv("ANTHROPIC_API_KEY"))) + if err != nil { + fmt.Fprintf(os.Stderr, "could not create provider: %v\n", err) + os.Exit(1) + } + + model, err := provider.LanguageModel(ctx, *flagModel) + if err != nil { + fmt.Fprintf(os.Stderr, "could not get language model: %v\n", err) + os.Exit(1) + } + + // Create the computer use tool (provider-defined) for the model. + displayNum := int64(session.info.Display) + enableZoom := true + computerTool := anthropic.NewComputerUseTool(anthropic.ComputerUseToolOptions{ + DisplayWidthPx: int64(defaultWidth), + DisplayHeightPx: int64(defaultHeight), + DisplayNumber: &displayNum, + EnableZoom: &enableZoom, + ToolVersion: anthropic.ComputerUse20251124, + }) + + fmt.Printf("provider: anthropic model: %s max steps: %d\n", *flagModel, *flagMaxSteps) + fmt.Printf("prompt: %q\n\n", *flagPrompt) + fmt.Println("agent output (streaming):") + + // Derive messages log path from the recording path (same base, .json). + messagesPath := strings.TrimSuffix(recordingPath, filepath.Ext(recordingPath)) + ".json" + fmt.Printf("messages: %s\n", messagesPath) + + if err := runAgentLoop(ctx, model, computerTool, *flagPrompt, *flagMaxSteps, messagesPath); err != nil { + fmt.Fprintf(os.Stderr, "agent loop failed: %v\n", err) + } + + // Finalize. + recording.stop() + fmt.Printf("\nsaved recording: %s\n", recordingPath) + + if viewerCmd.Process != nil { + _ = viewerCmd.Process.Kill() + } + + openHostBrowser("file://" + recordingPath) + fmt.Printf("opened recording: file://%s\n", recordingPath) +}