browser: add window resize tool with chromedp
Co-Authored-By: sketch <hello@sketch.dev>
Change-ID: s68bdc7fb84309a7ck
diff --git a/claudetool/browse/README.md b/claudetool/browse/README.md
index 6c03e1a..7cc3437 100644
--- a/claudetool/browse/README.md
+++ b/claudetool/browse/README.md
@@ -14,6 +14,7 @@
6. `browser_eval` - Evaluate JavaScript in the browser context
7. `browser_screenshot` - Take a screenshot of the page or a specific element
8. `browser_scroll_into_view` - Scroll an element into view
+9. `browser_resize` - Resize the browser window to specific dimensions
## Usage
diff --git a/claudetool/browse/browse.go b/claudetool/browse/browse.go
index a5a1621..8a72390 100644
--- a/claudetool/browse/browse.go
+++ b/claudetool/browse/browse.go
@@ -655,6 +655,71 @@
return llm.TextContent(successResponse()), nil
}
+// ResizeTool definition
+type resizeInput struct {
+ Width int `json:"width"`
+ Height int `json:"height"`
+ Timeout string `json:"timeout,omitempty"`
+}
+
+// NewResizeTool creates a tool for resizing the browser window
+func (b *BrowseTools) NewResizeTool() *llm.Tool {
+ return &llm.Tool{
+ Name: "browser_resize",
+ Description: "Resize the browser window to a specific width and height",
+ InputSchema: json.RawMessage(`{
+ "type": "object",
+ "properties": {
+ "width": {
+ "type": "integer",
+ "description": "Window width in pixels"
+ },
+ "height": {
+ "type": "integer",
+ "description": "Window height in pixels"
+ },
+ "timeout": {
+ "type": "string",
+ "description": "Timeout as a Go duration string (default: 5s)"
+ }
+ },
+ "required": ["width", "height"]
+ }`),
+ Run: b.resizeRun,
+ }
+}
+
+func (b *BrowseTools) resizeRun(ctx context.Context, m json.RawMessage) ([]llm.Content, error) {
+ var input resizeInput
+ if err := json.Unmarshal(m, &input); err != nil {
+ return llm.TextContent(errorResponse(fmt.Errorf("invalid input: %w", err))), nil
+ }
+
+ browserCtx, err := b.GetBrowserContext()
+ if err != nil {
+ return llm.TextContent(errorResponse(err)), nil
+ }
+
+ // Create a timeout context for this operation
+ timeoutCtx, cancel := context.WithTimeout(browserCtx, parseTimeout(input.Timeout))
+ defer cancel()
+
+ // Validate dimensions
+ if input.Width <= 0 || input.Height <= 0 {
+ return llm.TextContent(errorResponse(fmt.Errorf("invalid dimensions: width and height must be positive"))), nil
+ }
+
+ // Resize the browser window
+ err = chromedp.Run(timeoutCtx,
+ chromedp.EmulateViewport(int64(input.Width), int64(input.Height)),
+ )
+ if err != nil {
+ return llm.TextContent(errorResponse(err)), nil
+ }
+
+ return llm.TextContent(successResponse()), nil
+}
+
// GetTools returns browser tools, optionally filtering out screenshot-related tools
func (b *BrowseTools) GetTools(includeScreenshotTools bool) []*llm.Tool {
tools := []*llm.Tool{
@@ -665,6 +730,7 @@
b.NewGetTextTool(),
b.NewEvalTool(),
b.NewScrollIntoViewTool(),
+ b.NewResizeTool(),
}
// Add screenshot-related tools if supported
diff --git a/claudetool/browse/browse_test.go b/claudetool/browse/browse_test.go
index ae4fefd..7cffa0e 100644
--- a/claudetool/browse/browse_test.go
+++ b/claudetool/browse/browse_test.go
@@ -12,6 +12,7 @@
"time"
"github.com/chromedp/chromedp"
+ "github.com/stretchr/testify/require"
"sketch.dev/llm"
)
@@ -72,7 +73,7 @@
// Test with screenshot tools included
t.Run("with screenshots", func(t *testing.T) {
toolsWithScreenshots := tools.GetTools(true)
- if len(toolsWithScreenshots) != 9 {
+ if len(toolsWithScreenshots) != 10 {
t.Errorf("expected 9 tools with screenshots, got %d", len(toolsWithScreenshots))
}
@@ -87,7 +88,7 @@
// Test without screenshot tools
t.Run("without screenshots", func(t *testing.T) {
noScreenshotTools := tools.GetTools(false)
- if len(noScreenshotTools) != 7 {
+ if len(noScreenshotTools) != 8 {
t.Errorf("expected 7 tools without screenshots, got %d", len(noScreenshotTools))
}
})
@@ -304,3 +305,42 @@
t.Errorf("Expected Data in second content")
}
}
+
+// TestResizeTool tests the browser resize functionality
+func TestResizeTool(t *testing.T) {
+ ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+ defer cancel()
+
+ // Skip if CI or headless testing environment
+ if os.Getenv("CI") != "" || os.Getenv("HEADLESS_TEST") != "" {
+ t.Skip("Skipping browser test in CI/headless environment")
+ }
+
+ t.Run("ResizeWindow", func(t *testing.T) {
+ tools := NewBrowseTools(ctx)
+ defer tools.Close()
+
+ // Resize to mobile dimensions
+ resizeTool := tools.NewResizeTool()
+ input := json.RawMessage(`{"width": 375, "height": 667}`)
+ content, err := resizeTool.Run(ctx, input)
+ require.NoError(t, err)
+ require.Contains(t, content[0].Text, "success")
+
+ // Navigate to a test page and verify using JavaScript to get window dimensions
+ navInput := json.RawMessage(`{"url": "https://example.com"}`)
+ content, err = tools.NewNavigateTool().Run(ctx, navInput)
+ require.NoError(t, err)
+ require.Contains(t, content[0].Text, "success")
+
+ // Check dimensions via JavaScript
+ evalInput := json.RawMessage(`{"expression": "({width: window.innerWidth, height: window.innerHeight})"}`)
+ content, err = tools.NewEvalTool().Run(ctx, evalInput)
+ require.NoError(t, err)
+
+ // The dimensions might not be exactly what we set (browser chrome, etc.)
+ // but they should be close
+ require.Contains(t, content[0].Text, "width")
+ require.Contains(t, content[0].Text, "height")
+ })
+}
diff --git a/claudetool/browse/browser_resize.go b/claudetool/browse/browser_resize.go
new file mode 100644
index 0000000..557ea66
--- /dev/null
+++ b/claudetool/browse/browser_resize.go
@@ -0,0 +1,2 @@
+// Package browse contains browser automation tools
+package browse
diff --git a/go.mod b/go.mod
index b8073b1..462a675 100644
--- a/go.mod
+++ b/go.mod
@@ -16,6 +16,7 @@
github.com/pkg/sftp v1.13.9
github.com/richardlehane/crock32 v1.0.1
github.com/sashabaranov/go-openai v1.38.2
+ github.com/stretchr/testify v1.10.0
go.skia.org/infra v0.0.0-20250421160028-59e18403fd4a
golang.org/x/crypto v0.37.0
golang.org/x/net v0.39.0
@@ -29,6 +30,7 @@
github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be // indirect
github.com/chromedp/cdproto v0.0.0-20250403032234-65de8f5d025b // indirect
github.com/chromedp/sysutil v1.1.0 // indirect
+ github.com/davecgh/go-spew v1.1.1 // indirect
github.com/go-json-experiment/json v0.0.0-20250211171154-1ae217ad3535 // indirect
github.com/gobwas/httphead v0.1.0 // indirect
github.com/gobwas/pool v0.2.1 // indirect
@@ -36,9 +38,11 @@
github.com/kr/fs v0.1.0 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
+ github.com/pmezard/go-difflib v1.0.0 // indirect
golang.org/x/mod v0.24.0 // indirect
golang.org/x/sys v0.32.0 // indirect
golang.org/x/text v0.24.0 // indirect
+ gopkg.in/yaml.v3 v3.0.1 // indirect
)
tool golang.org/x/tools/cmd/stringer
diff --git a/go.sum b/go.sum
index 65d5385..1e248df 100644
--- a/go.sum
+++ b/go.sum
@@ -154,6 +154,7 @@
golang.org/x/tools v0.32.0 h1:Q7N1vhpkQv7ybVzLFtTjvQya2ewbwNDZzUgfXGqtMWU=
golang.org/x/tools v0.32.0/go.mod h1:ZxrU41P/wAbZD8EDa6dDCa6XfpkhJ7HFMjHJXfBDu8s=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
diff --git a/loop/testdata/agent_loop.httprr b/loop/testdata/agent_loop.httprr
index fe631a1..81627b6 100644
--- a/loop/testdata/agent_loop.httprr
+++ b/loop/testdata/agent_loop.httprr
@@ -1,9 +1,9 @@
httprr trace v1
-15608 2329
+16161 2332
POST https://api.anthropic.com/v1/messages HTTP/1.1
Host: api.anthropic.com
User-Agent: Go-http-client/1.1
-Content-Length: 15410
+Content-Length: 15963
Anthropic-Version: 2023-06-01
Content-Type: application/json
@@ -390,6 +390,31 @@
}
},
{
+ "name": "browser_resize",
+ "description": "Resize the browser window to a specific width and height",
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "width": {
+ "type": "integer",
+ "description": "Window width in pixels"
+ },
+ "height": {
+ "type": "integer",
+ "description": "Window height in pixels"
+ },
+ "timeout": {
+ "type": "string",
+ "description": "Timeout as a Go duration string (default: 5s)"
+ }
+ },
+ "required": [
+ "width",
+ "height"
+ ]
+ }
+ },
+ {
"name": "browser_take_screenshot",
"description": "Take a screenshot of the page or a specific element",
"input_schema": {
@@ -496,24 +521,24 @@
Anthropic-Organization-Id: 3c473a21-7208-450a-a9f8-80aebda45c1b
Anthropic-Ratelimit-Input-Tokens-Limit: 200000
Anthropic-Ratelimit-Input-Tokens-Remaining: 199000
-Anthropic-Ratelimit-Input-Tokens-Reset: 2025-05-11T01:21:43Z
+Anthropic-Ratelimit-Input-Tokens-Reset: 2025-05-11T01:26:00Z
Anthropic-Ratelimit-Output-Tokens-Limit: 80000
Anthropic-Ratelimit-Output-Tokens-Remaining: 80000
-Anthropic-Ratelimit-Output-Tokens-Reset: 2025-05-11T01:21:47Z
+Anthropic-Ratelimit-Output-Tokens-Reset: 2025-05-11T01:26:04Z
Anthropic-Ratelimit-Requests-Limit: 4000
Anthropic-Ratelimit-Requests-Remaining: 3999
-Anthropic-Ratelimit-Requests-Reset: 2025-05-11T01:21:42Z
+Anthropic-Ratelimit-Requests-Reset: 2025-05-11T01:25:59Z
Anthropic-Ratelimit-Tokens-Limit: 280000
Anthropic-Ratelimit-Tokens-Remaining: 279000
-Anthropic-Ratelimit-Tokens-Reset: 2025-05-11T01:21:43Z
+Anthropic-Ratelimit-Tokens-Reset: 2025-05-11T01:26:00Z
Cf-Cache-Status: DYNAMIC
-Cf-Ray: 93dddc70ba53942c-SJC
+Cf-Ray: 93dde2b41c34cf2b-SJC
Content-Type: application/json
-Date: Sun, 11 May 2025 01:21:47 GMT
-Request-Id: req_011CNzsCMP4tG7GTL8PrNTDw
+Date: Sun, 11 May 2025 01:26:04 GMT
+Request-Id: req_011CNzsXGD3jV7cnpYvABKN2
Server: cloudflare
Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
Via: 1.1 google
X-Robots-Tag: none
-{"id":"msg_0152wEWVypxw8wDR31nYSXQd","type":"message","role":"assistant","model":"claude-3-7-sonnet-20250219","content":[{"type":"text","text":"Here are the tools available to me:\n\n1. bash - Execute shell commands\n2. keyword_search - Search for files based on keywords\n3. think - Record thoughts and plans\n4. title - Set conversation title\n5. precommit - Create a git branch for work\n6. done - Mark a task as complete with checklist\n7. codereview - Run automated code review\n8. multiplechoice - Present multiple choice options to user\n9. Browser tools:\n - browser_navigate - Navigate to URLs\n - browser_click - Click elements\n - browser_type - Type into elements\n - browser_wait_for - Wait for elements\n - browser_get_text - Get text from elements\n - browser_eval - Evaluate JavaScript\n - browser_scroll_into_view - Scroll elements into view\n - browser_take_screenshot - Take screenshots\n - browser_read_image - Read image files\n10. patch - Make precise text edits to files\n\nThese tools allow me to execute commands, search codebases, plan, manage git branches, evaluate code, interact with web browsers, and make file modifications."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":4,"cache_creation_input_tokens":3609,"cache_read_input_tokens":0,"output_tokens":254}}
\ No newline at end of file
+{"id":"msg_01Q6jfLzpKJGDUmGXgP2w3FQ","type":"message","role":"assistant","model":"claude-3-7-sonnet-20250219","content":[{"type":"text","text":"Here's a brief list of the tools available to me:\n\n1. bash - Execute shell commands\n2. keyword_search - Search for files using keywords\n3. think - Record thoughts, notes, and plans\n4. title - Set conversation title\n5. precommit - Create a git branch for tracking work\n6. done - Mark completion of user's goal with checklist\n7. codereview - Run automated code review\n8. multiplechoice - Present user with quick answer options\n9. Browser tools:\n - browser_navigate - Go to URL\n - browser_click - Click element\n - browser_type - Type text\n - browser_wait_for - Wait for element\n - browser_get_text - Get text from element\n - browser_eval - Run JavaScript\n - browser_scroll_into_view - Scroll element into view\n - browser_resize - Resize browser window\n - browser_take_screenshot - Take screenshot\n - browser_read_image - Read image file\n10. patch - Make precise text edits to files\n\nThese tools allow me to explore, analyze, modify code, interact with web pages, and help complete your tasks."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":4,"cache_creation_input_tokens":3727,"cache_read_input_tokens":0,"output_tokens":262}}
\ No newline at end of file