claudetool: streamline browser tools
browser_click, browser_type, browser_get_text, browser_scroll_into_view,
browser_resize, and wait_for can all be easily implemented using browser_eval,
if browser_eval is given an await parameter.
A bit of testing suggests that they are more robust in practice
that way, and that multiple tool calls can be combined
into a single browser_eval call, which reduces latency.
And Sketch does in fact use them as needed.
Also, bump up timeouts; empirically, 5 seconds is not enough.
As a bonus, fewer tools is better for context management.
Co-Authored-By: sketch <hello@sketch.dev>
Change-ID: s8d8cd418f3e97f26k
diff --git a/claudetool/browse/README.md b/claudetool/browse/README.md
index 69ebf07..74da1bb 100644
--- a/claudetool/browse/README.md
+++ b/claudetool/browse/README.md
@@ -7,14 +7,8 @@
## Available Tools
1. `browser_navigate` - Navigate to a URL and wait for the page to load
-2. `browser_click` - Click an element matching a CSS selector
-3. `browser_type` - Type text into an input field
-4. `browser_wait_for` - Wait for an element to appear in the DOM
-5. `browser_get_text` - Get the text content of an element
-6. `browser_eval` - Evaluate JavaScript in the browser context
-7. `browser_screenshot` - Take a screenshot of the page or a specific element
-8. `browser_scroll_into_view` - Scroll an element into view
-9. `browser_resize` - Resize the browser window to specific dimensions
+2. `browser_eval` - Evaluate JavaScript in the browser context
+3. `browser_screenshot` - Take a screenshot of the page or a specific element
## Usage
diff --git a/claudetool/browse/browse.go b/claudetool/browse/browse.go
index 577a5a9..81ae105 100644
--- a/claudetool/browse/browse.go
+++ b/claudetool/browse/browse.go
@@ -173,7 +173,7 @@
},
"timeout": {
"type": "string",
- "description": "Timeout as a Go duration string (default: 5s)"
+ "description": "Timeout as a Go duration string (default: 15s)"
}
},
"required": ["url"]
@@ -212,264 +212,19 @@
return llm.ToolOut{LLMContent: llm.TextContent("done")}
}
-// ClickTool definition
-type clickInput struct {
- Selector string `json:"selector"`
- WaitVisible bool `json:"wait_visible,omitempty"`
- Timeout string `json:"timeout,omitempty"`
-}
-
-// NewClickTool creates a tool for clicking elements
-func (b *BrowseTools) NewClickTool() *llm.Tool {
- return &llm.Tool{
- Name: "browser_click",
- Description: "Click the first element matching a CSS selector",
- InputSchema: json.RawMessage(`{
- "type": "object",
- "properties": {
- "selector": {
- "type": "string",
- "description": "CSS selector for the element to click"
- },
- "wait_visible": {
- "type": "boolean",
- "description": "Wait for the element to be visible before clicking"
- },
- "timeout": {
- "type": "string",
- "description": "Timeout as a Go duration string (default: 5s)"
- }
- },
- "required": ["selector"]
- }`),
- Run: b.clickRun,
- }
-}
-
-func (b *BrowseTools) clickRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
- var input clickInput
- if err := json.Unmarshal(m, &input); err != nil {
- return llm.ErrorfToolOut("invalid input: %w", err)
- }
-
- browserCtx, err := b.GetBrowserContext()
- if err != nil {
- return llm.ErrorToolOut(err)
- }
-
- // Create a timeout context for this operation
- timeoutCtx, cancel := context.WithTimeout(browserCtx, parseTimeout(input.Timeout))
- defer cancel()
-
- actions := []chromedp.Action{
- chromedp.WaitReady(input.Selector),
- }
-
- if input.WaitVisible {
- actions = append(actions, chromedp.WaitVisible(input.Selector))
- }
-
- actions = append(actions, chromedp.Click(input.Selector))
-
- err = chromedp.Run(timeoutCtx, actions...)
- if err != nil {
- return llm.ErrorToolOut(err)
- }
-
- return llm.ToolOut{LLMContent: llm.TextContent("done")}
-}
-
-// TypeTool definition
-type typeInput struct {
- Selector string `json:"selector"`
- Text string `json:"text"`
- Clear bool `json:"clear,omitempty"`
- Timeout string `json:"timeout,omitempty"`
-}
-
-// NewTypeTool creates a tool for typing into input elements
-func (b *BrowseTools) NewTypeTool() *llm.Tool {
- return &llm.Tool{
- Name: "browser_type",
- Description: "Type text into an input or textarea element",
- InputSchema: json.RawMessage(`{
- "type": "object",
- "properties": {
- "selector": {
- "type": "string",
- "description": "CSS selector for the input element"
- },
- "text": {
- "type": "string",
- "description": "Text to type into the element"
- },
- "clear": {
- "type": "boolean",
- "description": "Clear the input field before typing"
- },
- "timeout": {
- "type": "string",
- "description": "Timeout as a Go duration string (default: 5s)"
- }
- },
- "required": ["selector", "text"]
- }`),
- Run: b.typeRun,
- }
-}
-
-func (b *BrowseTools) typeRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
- var input typeInput
- if err := json.Unmarshal(m, &input); err != nil {
- return llm.ErrorfToolOut("invalid input: %w", err)
- }
-
- browserCtx, err := b.GetBrowserContext()
- if err != nil {
- return llm.ErrorToolOut(err)
- }
-
- // Create a timeout context for this operation
- timeoutCtx, cancel := context.WithTimeout(browserCtx, parseTimeout(input.Timeout))
- defer cancel()
-
- actions := []chromedp.Action{
- chromedp.WaitReady(input.Selector),
- chromedp.WaitVisible(input.Selector),
- }
-
- if input.Clear {
- actions = append(actions, chromedp.Clear(input.Selector))
- }
-
- actions = append(actions, chromedp.SendKeys(input.Selector, input.Text))
-
- err = chromedp.Run(timeoutCtx, actions...)
- if err != nil {
- return llm.ErrorToolOut(err)
- }
-
- return llm.ToolOut{LLMContent: llm.TextContent("done")}
-}
-
-// WaitForTool definition
-type waitForInput struct {
- Selector string `json:"selector"`
- Timeout string `json:"timeout,omitempty"`
-}
-
-// NewWaitForTool creates a tool for waiting for elements
-func (b *BrowseTools) NewWaitForTool() *llm.Tool {
- return &llm.Tool{
- Name: "browser_wait_for",
- Description: "Wait for an element to be present in the DOM",
- InputSchema: json.RawMessage(`{
- "type": "object",
- "properties": {
- "selector": {
- "type": "string",
- "description": "CSS selector for the element to wait for"
- },
- "timeout": {
- "type": "string",
- "description": "Timeout as a Go duration string (default: 5s)"
- }
- },
- "required": ["selector"]
- }`),
- Run: b.waitForRun,
- }
-}
-
-func (b *BrowseTools) waitForRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
- var input waitForInput
- if err := json.Unmarshal(m, &input); err != nil {
- return llm.ErrorfToolOut("invalid input: %w", err)
- }
-
- browserCtx, err := b.GetBrowserContext()
- if err != nil {
- return llm.ErrorToolOut(err)
- }
-
- timeoutCtx, cancel := context.WithTimeout(browserCtx, parseTimeout(input.Timeout))
- defer cancel()
-
- err = chromedp.Run(timeoutCtx, chromedp.WaitReady(input.Selector))
- if err != nil {
- return llm.ErrorToolOut(err)
- }
-
- return llm.ToolOut{LLMContent: llm.TextContent("done")}
-}
-
-// GetTextTool definition
-type getTextInput struct {
- Selector string `json:"selector"`
- Timeout string `json:"timeout,omitempty"`
-}
-
-// NewGetTextTool creates a tool for getting text from elements
-func (b *BrowseTools) NewGetTextTool() *llm.Tool {
- return &llm.Tool{
- Name: "browser_get_text",
- Description: "Get the innerText of an element, returned in innerText tag. Can be used to read the web page.",
- InputSchema: json.RawMessage(`{
- "type": "object",
- "properties": {
- "selector": {
- "type": "string",
- "description": "CSS selector for the element to get text from"
- },
- "timeout": {
- "type": "string",
- "description": "Timeout as a Go duration string (default: 5s)"
- }
- },
- "required": ["selector"]
- }`),
- Run: b.getTextRun,
- }
-}
-
-func (b *BrowseTools) getTextRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
- var input getTextInput
- if err := json.Unmarshal(m, &input); err != nil {
- return llm.ErrorfToolOut("invalid input: %w", err)
- }
-
- browserCtx, err := b.GetBrowserContext()
- if err != nil {
- return llm.ErrorToolOut(err)
- }
-
- // Create a timeout context for this operation
- timeoutCtx, cancel := context.WithTimeout(browserCtx, parseTimeout(input.Timeout))
- defer cancel()
-
- var text string
- err = chromedp.Run(timeoutCtx,
- chromedp.WaitReady(input.Selector),
- chromedp.Text(input.Selector, &text),
- )
- if err != nil {
- return llm.ErrorToolOut(err)
- }
-
- return llm.ToolOut{LLMContent: llm.TextContent("<innerText>" + text + "</innerText>")}
-}
-
// EvalTool definition
type evalInput struct {
Expression string `json:"expression"`
Timeout string `json:"timeout,omitempty"`
+ Await *bool `json:"await,omitempty"`
}
// NewEvalTool creates a tool for evaluating JavaScript
func (b *BrowseTools) NewEvalTool() *llm.Tool {
return &llm.Tool{
- Name: "browser_eval",
- Description: "Evaluate JavaScript in the browser context",
+ Name: "browser_eval",
+ Description: `Evaluate JavaScript in the browser context.
+Your go-to tool for interacting with content: clicking buttons, typing, getting content, scrolling, resizing, waiting for content/selector to be ready, etc.`,
InputSchema: json.RawMessage(`{
"type": "object",
"properties": {
@@ -479,7 +234,11 @@
},
"timeout": {
"type": "string",
- "description": "Timeout as a Go duration string (default: 5s)"
+ "description": "Timeout as a Go duration string (default: 15s)"
+ },
+ "await": {
+ "type": "boolean",
+ "description": "If true, wait for promises to resolve and return their resolved value (default: true)"
}
},
"required": ["expression"]
@@ -504,7 +263,21 @@
defer cancel()
var result any
- err = chromedp.Run(timeoutCtx, chromedp.Evaluate(input.Expression, &result))
+ var evalOps []chromedp.EvaluateOption
+
+ await := true
+ if input.Await != nil {
+ await = *input.Await
+ }
+ if await {
+ evalOps = append(evalOps, func(p *runtime.EvaluateParams) *runtime.EvaluateParams {
+ return p.WithAwaitPromise(true)
+ })
+ }
+
+ evalAction := chromedp.Evaluate(input.Expression, &result, evalOps...)
+
+ err = chromedp.Run(timeoutCtx, evalAction)
if err != nil {
return llm.ErrorToolOut(err)
}
@@ -538,7 +311,7 @@
},
"timeout": {
"type": "string",
- "description": "Timeout as a Go duration string (default: 5s)"
+ "description": "Timeout as a Go duration string (default: 15s)"
}
}
}`),
@@ -606,151 +379,11 @@
}}
}
-// ScrollIntoViewTool definition
-type scrollIntoViewInput struct {
- Selector string `json:"selector"`
- Timeout string `json:"timeout,omitempty"`
-}
-
-// NewScrollIntoViewTool creates a tool for scrolling elements into view
-func (b *BrowseTools) NewScrollIntoViewTool() *llm.Tool {
- return &llm.Tool{
- Name: "browser_scroll_into_view",
- Description: "Scroll an element into view if it's not visible",
- InputSchema: json.RawMessage(`{
- "type": "object",
- "properties": {
- "selector": {
- "type": "string",
- "description": "CSS selector for the element to scroll into view"
- },
- "timeout": {
- "type": "string",
- "description": "Timeout as a Go duration string (default: 5s)"
- }
- },
- "required": ["selector"]
- }`),
- Run: b.scrollIntoViewRun,
- }
-}
-
-func (b *BrowseTools) scrollIntoViewRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
- var input scrollIntoViewInput
- if err := json.Unmarshal(m, &input); err != nil {
- return llm.ErrorfToolOut("invalid input: %w", err)
- }
-
- browserCtx, err := b.GetBrowserContext()
- if err != nil {
- return llm.ErrorToolOut(err)
- }
-
- // Create a timeout context for this operation
- timeoutCtx, cancel := context.WithTimeout(browserCtx, parseTimeout(input.Timeout))
- defer cancel()
-
- script := fmt.Sprintf(`
- const el = document.querySelector('%s');
- if (el) {
- el.scrollIntoView({behavior: 'smooth', block: 'center'});
- return true;
- }
- return false;
- `, input.Selector)
-
- var result bool
- err = chromedp.Run(timeoutCtx,
- chromedp.WaitReady(input.Selector),
- chromedp.Evaluate(script, &result),
- )
- if err != nil {
- return llm.ErrorToolOut(err)
- }
-
- if !result {
- return llm.ErrorToolOut(fmt.Errorf("element not found: %s", input.Selector))
- }
-
- return llm.ToolOut{LLMContent: llm.TextContent("done")}
-}
-
-// ResizeTool definition
-type resizeInput struct {
- Width int `json:"width"`
- Height int `json:"height"`
- Timeout string `json:"timeout,omitempty"`
-}
-
-// NewResizeTool creates a tool for resizing the browser window
-func (b *BrowseTools) NewResizeTool() *llm.Tool {
- return &llm.Tool{
- Name: "browser_resize",
- Description: "Resize the browser window to a specific width and height",
- InputSchema: json.RawMessage(`{
- "type": "object",
- "properties": {
- "width": {
- "type": "integer",
- "description": "Window width in pixels"
- },
- "height": {
- "type": "integer",
- "description": "Window height in pixels"
- },
- "timeout": {
- "type": "string",
- "description": "Timeout as a Go duration string (default: 5s)"
- }
- },
- "required": ["width", "height"]
- }`),
- Run: b.resizeRun,
- }
-}
-
-func (b *BrowseTools) resizeRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
- var input resizeInput
- if err := json.Unmarshal(m, &input); err != nil {
- return llm.ErrorfToolOut("invalid input: %w", err)
- }
-
- browserCtx, err := b.GetBrowserContext()
- if err != nil {
- return llm.ErrorToolOut(err)
- }
-
- // Create a timeout context for this operation
- timeoutCtx, cancel := context.WithTimeout(browserCtx, parseTimeout(input.Timeout))
- defer cancel()
-
- // Validate dimensions
- if input.Width <= 0 || input.Height <= 0 {
- return llm.ErrorToolOut(fmt.Errorf("invalid dimensions: width and height must be positive"))
- }
-
- // Resize the browser window
- err = chromedp.Run(timeoutCtx,
- chromedp.EmulateViewport(int64(input.Width), int64(input.Height)),
- )
- if err != nil {
- return llm.ErrorToolOut(err)
- }
-
- return llm.ToolOut{LLMContent: llm.TextContent("done")}
-}
-
// GetTools returns browser tools, optionally filtering out screenshot-related tools
func (b *BrowseTools) GetTools(includeScreenshotTools bool) []*llm.Tool {
tools := []*llm.Tool{
b.NewNavigateTool(),
- b.NewClickTool(),
- b.NewTypeTool(),
- b.NewWaitForTool(),
- b.NewGetTextTool(),
b.NewEvalTool(),
- b.NewScrollIntoViewTool(),
- b.NewResizeTool(),
b.NewRecentConsoleLogsTool(),
b.NewClearConsoleLogsTool(),
}
@@ -809,7 +442,7 @@
},
"timeout": {
"type": "string",
- "description": "Timeout as a Go duration string (default: 5s)"
+ "description": "Timeout as a Go duration string (default: 15s)"
}
},
"required": ["path"]
@@ -861,16 +494,10 @@
// parseTimeout parses a timeout string and returns a time.Duration
// It returns a default of 5 seconds if the timeout is empty or invalid
func parseTimeout(timeout string) time.Duration {
- if timeout == "" {
- return 5 * time.Second // default 5 seconds
- }
-
dur, err := time.ParseDuration(timeout)
if err != nil {
- // If parsing fails, return the default
- return 5 * time.Second
+ return 15 * time.Second
}
-
return dur
}
diff --git a/claudetool/browse/browse_test.go b/claudetool/browse/browse_test.go
index d1b1eec..def464b 100644
--- a/claudetool/browse/browse_test.go
+++ b/claudetool/browse/browse_test.go
@@ -30,13 +30,8 @@
requiredProps []string
}{
{tools.NewNavigateTool(), "browser_navigate", "Navigate", []string{"url"}},
- {tools.NewClickTool(), "browser_click", "Click", []string{"selector"}},
- {tools.NewTypeTool(), "browser_type", "Type", []string{"selector", "text"}},
- {tools.NewWaitForTool(), "browser_wait_for", "Wait", []string{"selector"}},
- {tools.NewGetTextTool(), "browser_get_text", "Get", []string{"selector"}},
{tools.NewEvalTool(), "browser_eval", "Evaluate", []string{"expression"}},
{tools.NewScreenshotTool(), "browser_take_screenshot", "Take", nil},
- {tools.NewScrollIntoViewTool(), "browser_scroll_into_view", "Scroll", []string{"selector"}},
}
for _, tt := range toolTests {
@@ -78,8 +73,8 @@
// Test with screenshot tools included
t.Run("with screenshots", func(t *testing.T) {
toolsWithScreenshots := tools.GetTools(true)
- if len(toolsWithScreenshots) != 12 {
- t.Errorf("expected 12 tools with screenshots, got %d", len(toolsWithScreenshots))
+ if len(toolsWithScreenshots) != 6 {
+ t.Errorf("expected 6 tools with screenshots, got %d", len(toolsWithScreenshots))
}
// Check tool naming convention
@@ -94,8 +89,8 @@
// Test without screenshot tools
t.Run("without screenshots", func(t *testing.T) {
noScreenshotTools := tools.GetTools(false)
- if len(noScreenshotTools) != 10 {
- t.Errorf("expected 10 tools without screenshots, got %d", len(noScreenshotTools))
+ if len(noScreenshotTools) != 4 {
+ t.Errorf("expected 4 tools without screenshots, got %d", len(noScreenshotTools))
}
})
}
@@ -382,61 +377,3 @@
t.Errorf("Expected default height %v, got %v", expectedHeight, response.Height)
}
}
-
-// TestResizeTool tests the browser resize functionality
-func TestResizeTool(t *testing.T) {
- ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
- defer cancel()
-
- // Skip if CI or headless testing environment
- if os.Getenv("CI") != "" || os.Getenv("HEADLESS_TEST") != "" {
- t.Skip("Skipping browser test in CI/headless environment")
- }
-
- t.Run("ResizeWindow", func(t *testing.T) {
- tools := NewBrowseTools(ctx)
- t.Cleanup(func() {
- tools.Close()
- })
-
- // Resize to mobile dimensions
- resizeTool := tools.NewResizeTool()
- input := json.RawMessage(`{"width": 375, "height": 667}`)
- toolOut := resizeTool.Run(ctx, input)
- if toolOut.Error != nil {
- t.Fatalf("Error: %v", toolOut.Error)
- }
- content := toolOut.LLMContent
- if !strings.Contains(content[0].Text, "done") {
- t.Fatalf("Expected done in response, got: %s", content[0].Text)
- }
-
- // Navigate to a test page and verify using JavaScript to get window dimensions
- navInput := json.RawMessage(`{"url": "https://example.com"}`)
- toolOut = tools.NewNavigateTool().Run(ctx, navInput)
- if toolOut.Error != nil {
- t.Fatalf("Error: %v", toolOut.Error)
- }
- content = toolOut.LLMContent
- if !strings.Contains(content[0].Text, "done") {
- t.Fatalf("Expected done in response, got: %s", content[0].Text)
- }
-
- // Check dimensions via JavaScript
- evalInput := json.RawMessage(`{"expression": "({width: window.innerWidth, height: window.innerHeight})"}`)
- toolOut = tools.NewEvalTool().Run(ctx, evalInput)
- if toolOut.Error != nil {
- t.Fatalf("Error: %v", toolOut.Error)
- }
- content = toolOut.LLMContent
-
- // The dimensions might not be exactly what we set (browser chrome, etc.)
- // but they should be close
- if !strings.Contains(content[0].Text, "width") {
- t.Fatalf("Expected width in response, got: %s", content[0].Text)
- }
- if !strings.Contains(content[0].Text, "height") {
- t.Fatalf("Expected height in response, got: %s", content[0].Text)
- }
- })
-}
diff --git a/loop/testdata/agent_loop.httprr b/loop/testdata/agent_loop.httprr
index e485771..d3eeeae 100644
--- a/loop/testdata/agent_loop.httprr
+++ b/loop/testdata/agent_loop.httprr
@@ -1,9 +1,9 @@
httprr trace v1
-18043 2288
+15131 2320
POST https://api.anthropic.com/v1/messages HTTP/1.1
Host: api.anthropic.com
User-Agent: Go-http-client/1.1
-Content-Length: 17845
+Content-Length: 14933
Anthropic-Version: 2023-06-01
Content-Type: application/json
@@ -311,7 +311,7 @@
},
"timeout": {
"type": "string",
- "description": "Timeout as a Go duration string (default: 5s)"
+ "description": "Timeout as a Go duration string (default: 15s)"
}
},
"required": [
@@ -320,101 +320,8 @@
}
},
{
- "name": "browser_click",
- "description": "Click the first element matching a CSS selector",
- "input_schema": {
- "type": "object",
- "properties": {
- "selector": {
- "type": "string",
- "description": "CSS selector for the element to click"
- },
- "wait_visible": {
- "type": "boolean",
- "description": "Wait for the element to be visible before clicking"
- },
- "timeout": {
- "type": "string",
- "description": "Timeout as a Go duration string (default: 5s)"
- }
- },
- "required": [
- "selector"
- ]
- }
- },
- {
- "name": "browser_type",
- "description": "Type text into an input or textarea element",
- "input_schema": {
- "type": "object",
- "properties": {
- "selector": {
- "type": "string",
- "description": "CSS selector for the input element"
- },
- "text": {
- "type": "string",
- "description": "Text to type into the element"
- },
- "clear": {
- "type": "boolean",
- "description": "Clear the input field before typing"
- },
- "timeout": {
- "type": "string",
- "description": "Timeout as a Go duration string (default: 5s)"
- }
- },
- "required": [
- "selector",
- "text"
- ]
- }
- },
- {
- "name": "browser_wait_for",
- "description": "Wait for an element to be present in the DOM",
- "input_schema": {
- "type": "object",
- "properties": {
- "selector": {
- "type": "string",
- "description": "CSS selector for the element to wait for"
- },
- "timeout": {
- "type": "string",
- "description": "Timeout as a Go duration string (default: 5s)"
- }
- },
- "required": [
- "selector"
- ]
- }
- },
- {
- "name": "browser_get_text",
- "description": "Get the innerText of an element, returned in innerText tag. Can be used to read the web page.",
- "input_schema": {
- "type": "object",
- "properties": {
- "selector": {
- "type": "string",
- "description": "CSS selector for the element to get text from"
- },
- "timeout": {
- "type": "string",
- "description": "Timeout as a Go duration string (default: 5s)"
- }
- },
- "required": [
- "selector"
- ]
- }
- },
- {
"name": "browser_eval",
- "description": "Evaluate JavaScript in the browser context",
+ "description": "Evaluate JavaScript in the browser context.\nYour go-to tool for interacting with content: clicking buttons, typing, getting content, scrolling, resizing, waiting for content/selector to be ready, etc.",
"input_schema": {
"type": "object",
"properties": {
@@ -424,7 +331,11 @@
},
"timeout": {
"type": "string",
- "description": "Timeout as a Go duration string (default: 5s)"
+ "description": "Timeout as a Go duration string (default: 15s)"
+ },
+ "await": {
+ "type": "boolean",
+ "description": "If true, wait for promises to resolve and return their resolved value (default: true)"
}
},
"required": [
@@ -433,51 +344,6 @@
}
},
{
- "name": "browser_scroll_into_view",
- "description": "Scroll an element into view if it's not visible",
- "input_schema": {
- "type": "object",
- "properties": {
- "selector": {
- "type": "string",
- "description": "CSS selector for the element to scroll into view"
- },
- "timeout": {
- "type": "string",
- "description": "Timeout as a Go duration string (default: 5s)"
- }
- },
- "required": [
- "selector"
- ]
- }
- },
- {
- "name": "browser_resize",
- "description": "Resize the browser window to a specific width and height",
- "input_schema": {
- "type": "object",
- "properties": {
- "width": {
- "type": "integer",
- "description": "Window width in pixels"
- },
- "height": {
- "type": "integer",
- "description": "Window height in pixels"
- },
- "timeout": {
- "type": "string",
- "description": "Timeout as a Go duration string (default: 5s)"
- }
- },
- "required": [
- "width",
- "height"
- ]
- }
- },
- {
"name": "browser_recent_console_logs",
"description": "Get recent browser console logs",
"input_schema": {
@@ -510,7 +376,7 @@
},
"timeout": {
"type": "string",
- "description": "Timeout as a Go duration string (default: 5s)"
+ "description": "Timeout as a Go duration string (default: 15s)"
}
}
}
@@ -527,7 +393,7 @@
},
"timeout": {
"type": "string",
- "description": "Timeout as a Go duration string (default: 5s)"
+ "description": "Timeout as a Go duration string (default: 15s)"
}
},
"required": [
@@ -549,24 +415,24 @@
Anthropic-Organization-Id: 3c473a21-7208-450a-a9f8-80aebda45c1b
Anthropic-Ratelimit-Input-Tokens-Limit: 2000000
Anthropic-Ratelimit-Input-Tokens-Remaining: 2000000
-Anthropic-Ratelimit-Input-Tokens-Reset: 2025-07-31T04:47:48Z
+Anthropic-Ratelimit-Input-Tokens-Reset: 2025-07-31T04:56:24Z
Anthropic-Ratelimit-Output-Tokens-Limit: 400000
Anthropic-Ratelimit-Output-Tokens-Remaining: 400000
-Anthropic-Ratelimit-Output-Tokens-Reset: 2025-07-31T04:47:53Z
+Anthropic-Ratelimit-Output-Tokens-Reset: 2025-07-31T04:56:28Z
Anthropic-Ratelimit-Requests-Limit: 4000
Anthropic-Ratelimit-Requests-Remaining: 3999
-Anthropic-Ratelimit-Requests-Reset: 2025-07-31T04:47:47Z
+Anthropic-Ratelimit-Requests-Reset: 2025-07-31T04:56:23Z
Anthropic-Ratelimit-Tokens-Limit: 2400000
Anthropic-Ratelimit-Tokens-Remaining: 2400000
-Anthropic-Ratelimit-Tokens-Reset: 2025-07-31T04:47:48Z
+Anthropic-Ratelimit-Tokens-Reset: 2025-07-31T04:56:24Z
Cf-Cache-Status: DYNAMIC
-Cf-Ray: 967a75b2793ccf0e-SJC
+Cf-Ray: 967a824a7d62fa9e-SJC
Content-Type: application/json
-Date: Thu, 31 Jul 2025 04:47:53 GMT
-Request-Id: req_011CReVS1GWZPU1iZLUbAfLH
+Date: Thu, 31 Jul 2025 04:56:28 GMT
+Request-Id: req_011CReW62geX1vzU8VqNitZS
Server: cloudflare
Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
Via: 1.1 google
X-Robots-Tag: none
-{"id":"msg_01MpXPbWAfaBn5voE1Fa5THr","type":"message","role":"assistant","model":"claude-sonnet-4-20250514","content":[{"type":"text","text":"Here are the tools available to me:\n\n**File & Code Operations:**\n- `bash` - Execute shell commands\n- `patch` - Modify files with precise text edits\n- `keyword_search` - Search for files by content/concepts\n\n**Task Management:**\n- `todo_read` / `todo_write` - Track and manage task lists\n- `think` - Internal notes and planning\n\n**Development Workflow:**\n- `codereview` - Automated code review\n- `done` - Final checklist before completion\n\n**Browser Automation:**\n- `browser_navigate` - Navigate to URLs\n- `browser_click` / `browser_type` - Interact with elements\n- `browser_wait_for` / `browser_get_text` - Wait for and read content\n- `browser_eval` - Execute JavaScript\n- `browser_scroll_into_view` / `browser_resize` - View manipulation\n- `browser_take_screenshot` / `browser_recent_console_logs` - Debugging\n\n**System Info:**\n- `about_sketch` - Information about the Sketch environment\n- `read_image` - Read and encode image files"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":3,"cache_creation_input_tokens":4135,"cache_read_input_tokens":0,"output_tokens":261,"service_tier":"standard"}}
\ No newline at end of file
+{"id":"msg_0151My8VR6BwVPfBkrvtw6ne","type":"message","role":"assistant","model":"claude-sonnet-4-20250514","content":[{"type":"text","text":"Here are the tools available to me:\n\n**File & Code Management:**\n- `bash` - Execute shell commands\n- `patch` - Modify files with precise text edits\n- `keyword_search` - Search for files using keywords and queries\n\n**Task Management:**\n- `todo_read` - Read current todo list\n- `todo_write` - Create and manage task lists\n- `think` - Record thoughts and plans\n\n**Development Workflow:**\n- `done` - Mark work complete with verification checklist\n- `codereview` - Run automated code review\n- `about_sketch` - Get information about Sketch environment\n\n**Browser Automation:**\n- `browser_navigate` - Navigate to URLs\n- `browser_eval` - Execute JavaScript in browser\n- `browser_recent_console_logs` - Get console logs\n- `browser_clear_console_logs` - Clear console logs\n- `browser_take_screenshot` - Take screenshots\n- `read_image` - Read and encode image files\n\nThese tools enable autonomous coding, testing, file management, browser automation, and project organization."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":3,"cache_creation_input_tokens":3505,"cache_read_input_tokens":0,"output_tokens":244,"service_tier":"standard"}}
\ No newline at end of file
diff --git a/termui/termui.go b/termui/termui.go
index 16883ca..b3b85a3 100644
--- a/termui/termui.go
+++ b/termui/termui.go
@@ -51,22 +51,10 @@
🐛 Running automated code review, may be slow
{{else if eq .msg.ToolName "browser_navigate" -}}
🌐 {{.input.url -}}
-{{else if eq .msg.ToolName "browser_click" -}}
- 🖱️ {{.input.selector -}}
-{{else if eq .msg.ToolName "browser_type" -}}
- ⌨️ {{.input.selector}}: "{{.input.text}}"
-{{else if eq .msg.ToolName "browser_wait_for" -}}
- ⏳ {{.input.selector -}}
-{{else if eq .msg.ToolName "browser_get_text" -}}
- 📖 {{.input.selector -}}
{{else if eq .msg.ToolName "browser_eval" -}}
📱 {{.input.expression -}}
{{else if eq .msg.ToolName "browser_take_screenshot" -}}
📸 Screenshot
-{{else if eq .msg.ToolName "browser_scroll_into_view" -}}
- 🔄 {{.input.selector -}}
-{{else if eq .msg.ToolName "browser_resize" -}}
- 🖼️ {{.input.width}}x{{.input.height -}}
{{else if eq .msg.ToolName "read_image" -}}
🖼️ {{.input.path -}}
{{else if eq .msg.ToolName "browser_recent_console_logs" -}}
diff --git a/webui/src/web-components/demo/sketch-tool-card.demo.ts b/webui/src/web-components/demo/sketch-tool-card.demo.ts
index 625e555..95fc184 100644
--- a/webui/src/web-components/demo/sketch-tool-card.demo.ts
+++ b/webui/src/web-components/demo/sketch-tool-card.demo.ts
@@ -17,7 +17,6 @@
"../sketch-tool-card-browser-resize.ts",
"../sketch-tool-card-browser-scroll-into-view.ts",
"../sketch-tool-card-browser-type.ts",
- "../sketch-tool-card-browser-wait-for.ts",
"../sketch-tool-card-read-image.ts",
"../sketch-tool-card-take-screenshot.ts",
],
@@ -202,39 +201,6 @@
tool_call_id: "toolu_navigate",
},
},
- // Browser click
- {
- name: "browser_click",
- input: JSON.stringify({ selector: ".login-button" }),
- tool_call_id: "toolu_click",
- result_message: {
- type: "tool",
- tool_result: "Clicked element: .login-button",
- tool_call_id: "toolu_click",
- },
- },
- // Browser type
- {
- name: "browser_type",
- input: JSON.stringify({ selector: "#username", text: "testuser" }),
- tool_call_id: "toolu_type",
- result_message: {
- type: "tool",
- tool_result: "Typed 'testuser' into #username",
- tool_call_id: "toolu_type",
- },
- },
- // Browser get text
- {
- name: "browser_get_text",
- input: JSON.stringify({ selector: ".welcome-message" }),
- tool_call_id: "toolu_get_text",
- result_message: {
- type: "tool",
- tool_result: "Welcome to our application! Please log in to continue.",
- tool_call_id: "toolu_get_text",
- },
- },
// Browser eval
{
name: "browser_eval",
@@ -246,39 +212,6 @@
tool_call_id: "toolu_eval",
},
},
- // Browser wait for
- {
- name: "browser_wait_for",
- input: JSON.stringify({ selector: ".loading-complete" }),
- tool_call_id: "toolu_wait",
- result_message: {
- type: "tool",
- tool_result: "Element .loading-complete is now present",
- tool_call_id: "toolu_wait",
- },
- },
- // Browser resize
- {
- name: "browser_resize",
- input: JSON.stringify({ width: 1024, height: 768 }),
- tool_call_id: "toolu_resize",
- result_message: {
- type: "tool",
- tool_result: "Browser resized to 1024x768",
- tool_call_id: "toolu_resize",
- },
- },
- // Browser scroll into view
- {
- name: "browser_scroll_into_view",
- input: JSON.stringify({ selector: "#bottom-section" }),
- tool_call_id: "toolu_scroll",
- result_message: {
- type: "tool",
- tool_result: "Scrolled element #bottom-section into view",
- tool_call_id: "toolu_scroll",
- },
- },
// Browser clear console logs
{
name: "browser_clear_console_logs",
@@ -377,35 +310,9 @@
"sketch-tool-card-browser-navigate",
);
break;
- case "browser_click":
- toolCardEl = document.createElement("sketch-tool-card-browser-click");
- break;
- case "browser_type":
- toolCardEl = document.createElement("sketch-tool-card-browser-type");
- break;
- case "browser_get_text":
- toolCardEl = document.createElement(
- "sketch-tool-card-browser-get-text",
- );
- break;
case "browser_eval":
toolCardEl = document.createElement("sketch-tool-card-browser-eval");
break;
- case "browser_wait_for":
- toolCardEl = document.createElement(
- "sketch-tool-card-browser-wait-for",
- );
- break;
- case "browser_resize":
- toolCardEl = document.createElement(
- "sketch-tool-card-browser-resize",
- );
- break;
- case "browser_scroll_into_view":
- toolCardEl = document.createElement(
- "sketch-tool-card-browser-scroll-into-view",
- );
- break;
case "browser_clear_console_logs":
toolCardEl = document.createElement(
"sketch-tool-card-browser-clear-console-logs",
diff --git a/webui/src/web-components/mobile-chat.ts b/webui/src/web-components/mobile-chat.ts
index 5bdd03a..89c0a1c 100644
--- a/webui/src/web-components/mobile-chat.ts
+++ b/webui/src/web-components/mobile-chat.ts
@@ -257,13 +257,6 @@
case "browser_take_screenshot":
return "Taking screenshot";
- case "browser_click":
- return `Click: ${input.selector || ""}`;
-
- case "browser_type":
- const text = input.text || "";
- return `Type: ${text.length > 30 ? text.substring(0, 30) + "..." : text}`;
-
case "todo_write":
const tasks = input.tasks || [];
return `${tasks.length} task${tasks.length > 1 ? "s" : ""}`;
diff --git a/webui/src/web-components/sketch-tool-calls.ts b/webui/src/web-components/sketch-tool-calls.ts
index f83e901..2d25953 100644
--- a/webui/src/web-components/sketch-tool-calls.ts
+++ b/webui/src/web-components/sketch-tool-calls.ts
@@ -7,13 +7,7 @@
import "./sketch-tool-card-take-screenshot";
import "./sketch-tool-card-about-sketch";
import "./sketch-tool-card-browser-navigate";
-import "./sketch-tool-card-browser-click";
-import "./sketch-tool-card-browser-type";
-import "./sketch-tool-card-browser-wait-for";
-import "./sketch-tool-card-browser-get-text";
import "./sketch-tool-card-browser-eval";
-import "./sketch-tool-card-browser-scroll-into-view";
-import "./sketch-tool-card-browser-resize";
import "./sketch-tool-card-read-image";
import "./sketch-tool-card-browser-recent-console-logs";
import "./sketch-tool-card-browser-clear-console-logs";
@@ -93,41 +87,11 @@
.open=${open}
.toolCall=${toolCall}
></sketch-tool-card-keyword-search>`;
- case "browser_click":
- return html`<sketch-tool-card-browser-click
- .open=${open}
- .toolCall=${toolCall}
- ></sketch-tool-card-browser-click>`;
- case "browser_type":
- return html`<sketch-tool-card-browser-type
- .open=${open}
- .toolCall=${toolCall}
- ></sketch-tool-card-browser-type>`;
- case "browser_wait_for":
- return html`<sketch-tool-card-browser-wait-for
- .open=${open}
- .toolCall=${toolCall}
- ></sketch-tool-card-browser-wait-for>`;
- case "browser_get_text":
- return html`<sketch-tool-card-browser-get-text
- .open=${open}
- .toolCall=${toolCall}
- ></sketch-tool-card-browser-get-text>`;
case "browser_eval":
return html`<sketch-tool-card-browser-eval
.open=${open}
.toolCall=${toolCall}
></sketch-tool-card-browser-eval>`;
- case "browser_scroll_into_view":
- return html`<sketch-tool-card-browser-scroll-into-view
- .open=${open}
- .toolCall=${toolCall}
- ></sketch-tool-card-browser-scroll-into-view>`;
- case "browser_resize":
- return html`<sketch-tool-card-browser-resize
- .open=${open}
- .toolCall=${toolCall}
- ></sketch-tool-card-browser-resize>`;
case "read_image":
return html`<sketch-tool-card-read-image
.open=${open}
diff --git a/webui/src/web-components/sketch-tool-card-browser-resize.ts b/webui/src/web-components/sketch-tool-card-browser-resize.ts
deleted file mode 100644
index f76f792..0000000
--- a/webui/src/web-components/sketch-tool-card-browser-resize.ts
+++ /dev/null
@@ -1,66 +0,0 @@
-import { html } from "lit";
-import { customElement, property } from "lit/decorators.js";
-import { ToolCall } from "../types";
-import { SketchTailwindElement } from "./sketch-tailwind-element";
-import "./sketch-tool-card-base";
-
-@customElement("sketch-tool-card-browser-resize")
-export class SketchToolCardBrowserResize extends SketchTailwindElement {
- @property()
- toolCall: ToolCall;
-
- @property()
- open: boolean;
-
- render() {
- // Parse the input to get width and height
- let width = "";
- let height = "";
- try {
- if (this.toolCall?.input) {
- const input = JSON.parse(this.toolCall.input);
- width = input.width ? input.width.toString() : "";
- height = input.height ? input.height.toString() : "";
- }
- } catch (e) {
- console.error("Error parsing resize input:", e);
- }
-
- const summaryContent = html`<span
- class="font-mono text-gray-700 dark:text-gray-300 break-all"
- >
- 🖼️ ${width}x${height}
- </span>`;
- const inputContent = html`<div>
- Resize to:
- <span
- class="font-mono bg-black/[0.05] dark:bg-white/[0.1] px-2 py-1 rounded inline-block break-all"
- >${width}x${height}</span
- >
- </div>`;
- const resultContent = this.toolCall?.result_message?.tool_result
- ? html`<pre
- class="bg-gray-200 dark:bg-gray-700 text-black dark:text-gray-100 p-2 rounded whitespace-pre-wrap break-words max-w-full w-full box-border"
- >
-${this.toolCall.result_message.tool_result}</pre
- >`
- : "";
-
- return html`
- <sketch-tool-card-base
- .open=${this.open}
- .toolCall=${this.toolCall}
- .summaryContent=${summaryContent}
- .inputContent=${inputContent}
- .resultContent=${resultContent}
- >
- </sketch-tool-card-base>
- `;
- }
-}
-
-declare global {
- interface HTMLElementTagNameMap {
- "sketch-tool-card-browser-resize": SketchToolCardBrowserResize;
- }
-}
diff --git a/webui/src/web-components/sketch-tool-card-browser-wait-for.ts b/webui/src/web-components/sketch-tool-card-browser-wait-for.ts
deleted file mode 100644
index fb651f0..0000000
--- a/webui/src/web-components/sketch-tool-card-browser-wait-for.ts
+++ /dev/null
@@ -1,64 +0,0 @@
-import { html } from "lit";
-import { customElement, property } from "lit/decorators.js";
-import { ToolCall } from "../types";
-import { SketchTailwindElement } from "./sketch-tailwind-element";
-import "./sketch-tool-card-base";
-
-@customElement("sketch-tool-card-browser-wait-for")
-export class SketchToolCardBrowserWaitFor extends SketchTailwindElement {
- @property()
- toolCall: ToolCall;
-
- @property()
- open: boolean;
-
- render() {
- // Parse the input to get selector
- let selector = "";
- try {
- if (this.toolCall?.input) {
- const input = JSON.parse(this.toolCall.input);
- selector = input.selector || "";
- }
- } catch (e) {
- console.error("Error parsing wait for input:", e);
- }
-
- const summaryContent = html`<span
- class="font-mono text-gray-700 dark:text-gray-300 break-all"
- >
- ⏳ ${selector}
- </span>`;
- const inputContent = html`<div>
- Wait for:
- <span
- class="font-mono bg-black/[0.05] dark:bg-white/[0.1] px-2 py-1 rounded inline-block break-all"
- >${selector}</span
- >
- </div>`;
- const resultContent = this.toolCall?.result_message?.tool_result
- ? html`<pre
- class="bg-gray-200 dark:bg-gray-700 text-black dark:text-gray-100 p-2 rounded whitespace-pre-wrap break-words max-w-full w-full box-border"
- >
-${this.toolCall.result_message.tool_result}</pre
- >`
- : "";
-
- return html`
- <sketch-tool-card-base
- .open=${this.open}
- .toolCall=${this.toolCall}
- .summaryContent=${summaryContent}
- .inputContent=${inputContent}
- .resultContent=${resultContent}
- >
- </sketch-tool-card-base>
- `;
- }
-}
-
-declare global {
- interface HTMLElementTagNameMap {
- "sketch-tool-card-browser-wait-for": SketchToolCardBrowserWaitFor;
- }
-}