browser: Add console log capture tools
This was entirely Sketch-coded.
Co-Authored-By: sketch <hello@sketch.dev>
Change-ID: sc6bbdde6b1a71d8fk
diff --git a/claudetool/browse/browse.go b/claudetool/browse/browse.go
index 8a72390..40974e3 100644
--- a/claudetool/browse/browse.go
+++ b/claudetool/browse/browse.go
@@ -14,6 +14,7 @@
"sync"
"time"
+ "github.com/chromedp/cdproto/runtime"
"github.com/chromedp/chromedp"
"github.com/google/uuid"
"sketch.dev/llm"
@@ -35,6 +36,10 @@
// Map to track screenshots by ID and their creation time
screenshots map[string]time.Time
screenshotsMutex sync.Mutex
+ // Console logs storage
+ consoleLogs []*runtime.EventConsoleAPICalled
+ consoleLogsMutex sync.Mutex
+ maxConsoleLogs int
}
// NewBrowseTools creates a new set of browser automation tools
@@ -47,9 +52,11 @@
}
b := &BrowseTools{
- ctx: ctx,
- cancel: cancel,
- screenshots: make(map[string]time.Time),
+ ctx: ctx,
+ cancel: cancel,
+ screenshots: make(map[string]time.Time),
+ consoleLogs: make([]*runtime.EventConsoleAPICalled, 0),
+ maxConsoleLogs: 100,
}
return b
@@ -72,6 +79,14 @@
b.browserCtx = browserCtx
b.browserCtxCancel = browserCancel
+ // Set up console log listener
+ chromedp.ListenTarget(browserCtx, func(ev any) {
+ switch e := ev.(type) {
+ case *runtime.EventConsoleAPICalled:
+ b.captureConsoleLog(e)
+ }
+ })
+
// Ensure the browser starts
if err := chromedp.Run(browserCtx); err != nil {
b.initErr = fmt.Errorf("failed to start browser (please apt get chromium or equivalent): %w", err)
@@ -731,6 +746,8 @@
b.NewEvalTool(),
b.NewScrollIntoViewTool(),
b.NewResizeTool(),
+ b.NewRecentConsoleLogsTool(),
+ b.NewClearConsoleLogsTool(),
}
// Add screenshot-related tools if supported
@@ -851,3 +868,124 @@
return dur
}
+
+// captureConsoleLog captures a console log event and stores it
+func (b *BrowseTools) captureConsoleLog(e *runtime.EventConsoleAPICalled) {
+ // Add to logs with mutex protection
+ b.consoleLogsMutex.Lock()
+ defer b.consoleLogsMutex.Unlock()
+
+ // Add the log and maintain max size
+ b.consoleLogs = append(b.consoleLogs, e)
+ if len(b.consoleLogs) > b.maxConsoleLogs {
+ b.consoleLogs = b.consoleLogs[len(b.consoleLogs)-b.maxConsoleLogs:]
+ }
+}
+
+// RecentConsoleLogsTool definition
+type recentConsoleLogsInput struct {
+ Limit int `json:"limit,omitempty"`
+}
+
+// NewRecentConsoleLogsTool creates a tool for retrieving recent console logs
+func (b *BrowseTools) NewRecentConsoleLogsTool() *llm.Tool {
+ return &llm.Tool{
+ Name: "browser_recent_console_logs",
+ Description: "Get recent browser console logs",
+ InputSchema: json.RawMessage(`{
+ "type": "object",
+ "properties": {
+ "limit": {
+ "type": "integer",
+ "description": "Maximum number of log entries to return (default: 100)"
+ }
+ }
+ }`),
+ Run: b.recentConsoleLogsRun,
+ }
+}
+
+func (b *BrowseTools) recentConsoleLogsRun(ctx context.Context, m json.RawMessage) ([]llm.Content, error) {
+ var input recentConsoleLogsInput
+ if err := json.Unmarshal(m, &input); err != nil {
+ return llm.TextContent(errorResponse(fmt.Errorf("invalid input: %w", err))), nil
+ }
+
+ // Ensure browser is initialized
+ _, err := b.GetBrowserContext()
+ if err != nil {
+ return llm.TextContent(errorResponse(err)), nil
+ }
+
+ // Apply limit (default to 100 if not specified)
+ limit := 100
+ if input.Limit > 0 {
+ limit = input.Limit
+ }
+
+ // Get console logs with mutex protection
+ b.consoleLogsMutex.Lock()
+ logs := make([]*runtime.EventConsoleAPICalled, 0, len(b.consoleLogs))
+ start := 0
+ if len(b.consoleLogs) > limit {
+ start = len(b.consoleLogs) - limit
+ }
+ logs = append(logs, b.consoleLogs[start:]...)
+ b.consoleLogsMutex.Unlock()
+
+ // Format the logs as JSON
+ logData, err := json.MarshalIndent(logs, "", " ")
+ if err != nil {
+ return llm.TextContent(errorResponse(fmt.Errorf("failed to serialize logs: %w", err))), nil
+ }
+
+ // Format the logs
+ var sb strings.Builder
+ sb.WriteString(fmt.Sprintf("Retrieved %d console log entries:\n\n", len(logs)))
+
+ if len(logs) == 0 {
+ sb.WriteString("No console logs captured.")
+ } else {
+ // Add the JSON data for full details
+ sb.WriteString(string(logData))
+ }
+
+ return llm.TextContent(sb.String()), nil
+}
+
+// ClearConsoleLogsTool definition
+type clearConsoleLogsInput struct{}
+
+// NewClearConsoleLogsTool creates a tool for clearing console logs
+func (b *BrowseTools) NewClearConsoleLogsTool() *llm.Tool {
+ return &llm.Tool{
+ Name: "browser_clear_console_logs",
+ Description: "Clear all captured browser console logs",
+ InputSchema: json.RawMessage(`{
+ "type": "object",
+ "properties": {}
+ }`),
+ Run: b.clearConsoleLogsRun,
+ }
+}
+
+func (b *BrowseTools) clearConsoleLogsRun(ctx context.Context, m json.RawMessage) ([]llm.Content, error) {
+ var input clearConsoleLogsInput
+ if err := json.Unmarshal(m, &input); err != nil {
+ return llm.TextContent(errorResponse(fmt.Errorf("invalid input: %w", err))), nil
+ }
+
+ // Ensure browser is initialized
+ _, err := b.GetBrowserContext()
+ if err != nil {
+ return llm.TextContent(errorResponse(err)), nil
+ }
+
+ // Clear console logs with mutex protection
+ b.consoleLogsMutex.Lock()
+ logCount := len(b.consoleLogs)
+ b.consoleLogs = make([]*runtime.EventConsoleAPICalled, 0)
+ b.consoleLogsMutex.Unlock()
+
+ return llm.TextContent(fmt.Sprintf("Cleared %d console log entries.", logCount)), nil
+}
diff --git a/claudetool/browse/browse_test.go b/claudetool/browse/browse_test.go
index 7cffa0e..d417a06 100644
--- a/claudetool/browse/browse_test.go
+++ b/claudetool/browse/browse_test.go
@@ -73,8 +73,8 @@
// Test with screenshot tools included
t.Run("with screenshots", func(t *testing.T) {
toolsWithScreenshots := tools.GetTools(true)
- if len(toolsWithScreenshots) != 10 {
- t.Errorf("expected 9 tools with screenshots, got %d", len(toolsWithScreenshots))
+ if len(toolsWithScreenshots) != 12 {
+ t.Errorf("expected 12 tools with screenshots, got %d", len(toolsWithScreenshots))
}
// Check tool naming convention
@@ -88,8 +88,8 @@
// Test without screenshot tools
t.Run("without screenshots", func(t *testing.T) {
noScreenshotTools := tools.GetTools(false)
- if len(noScreenshotTools) != 8 {
- t.Errorf("expected 7 tools without screenshots, got %d", len(noScreenshotTools))
+ if len(noScreenshotTools) != 10 {
+ t.Errorf("expected 10 tools without screenshots, got %d", len(noScreenshotTools))
}
})
}
diff --git a/loop/testdata/agent_loop.httprr b/loop/testdata/agent_loop.httprr
index 3d1ad5b..c0bf004 100644
--- a/loop/testdata/agent_loop.httprr
+++ b/loop/testdata/agent_loop.httprr
@@ -1,9 +1,9 @@
httprr trace v1
-16446 2070
+16930 2597
POST https://api.anthropic.com/v1/messages HTTP/1.1
Host: api.anthropic.com
User-Agent: Go-http-client/1.1
-Content-Length: 16248
+Content-Length: 16732
Anthropic-Version: 2023-06-01
Content-Type: application/json
@@ -419,6 +419,27 @@
}
},
{
+ "name": "browser_recent_console_logs",
+ "description": "Get recent browser console logs",
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "limit": {
+ "type": "integer",
+ "description": "Maximum number of log entries to return (default: 100)"
+ }
+ }
+ }
+ },
+ {
+ "name": "browser_clear_console_logs",
+ "description": "Clear all captured browser console logs",
+ "input_schema": {
+ "type": "object",
+ "properties": {}
+ }
+ },
+ {
"name": "browser_take_screenshot",
"description": "Take a screenshot of the page or a specific element",
"input_schema": {
@@ -525,24 +546,24 @@
Anthropic-Organization-Id: 3c473a21-7208-450a-a9f8-80aebda45c1b
Anthropic-Ratelimit-Input-Tokens-Limit: 200000
Anthropic-Ratelimit-Input-Tokens-Remaining: 199000
-Anthropic-Ratelimit-Input-Tokens-Reset: 2025-05-13T02:19:49Z
+Anthropic-Ratelimit-Input-Tokens-Reset: 2025-05-13T23:34:09Z
Anthropic-Ratelimit-Output-Tokens-Limit: 80000
Anthropic-Ratelimit-Output-Tokens-Remaining: 80000
-Anthropic-Ratelimit-Output-Tokens-Reset: 2025-05-13T02:19:53Z
+Anthropic-Ratelimit-Output-Tokens-Reset: 2025-05-13T23:34:14Z
Anthropic-Ratelimit-Requests-Limit: 4000
Anthropic-Ratelimit-Requests-Remaining: 3999
-Anthropic-Ratelimit-Requests-Reset: 2025-05-13T02:19:48Z
+Anthropic-Ratelimit-Requests-Reset: 2025-05-13T23:34:07Z
Anthropic-Ratelimit-Tokens-Limit: 280000
Anthropic-Ratelimit-Tokens-Remaining: 279000
-Anthropic-Ratelimit-Tokens-Reset: 2025-05-13T02:19:49Z
+Anthropic-Ratelimit-Tokens-Reset: 2025-05-13T23:34:09Z
Cf-Cache-Status: DYNAMIC
-Cf-Ray: 93eeac4d2e40250d-SJC
+Cf-Ray: 93f5f6fa7ab517d2-SJC
Content-Type: application/json
-Date: Tue, 13 May 2025 02:19:53 GMT
-Request-Id: req_011CP4jFATfD5sh1tYAhwA8n
+Date: Tue, 13 May 2025 23:34:14 GMT
+Request-Id: req_011CP6QRHtUEsXiGbCcrW7o4
Server: cloudflare
Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
Via: 1.1 google
X-Robots-Tag: none
-{"id":"msg_017ZjPLV2KNbvMP9GbZxxEB4","type":"message","role":"assistant","model":"claude-3-7-sonnet-20250219","content":[{"type":"text","text":"I have access to the following tools:\n\n1. bash - Executes shell commands\n2. keyword_search - Locates files with search terms\n3. think - For reasoning and note-taking (no external effects)\n4. title - Sets conversation title\n5. precommit - Creates git branches for tracking work\n6. done - Used when the user's goal is achieved\n7. codereview - Runs automated code review\n8. multiplechoice - Presents quick answer options to the user\n9. browser_* tools (navigate, click, type, wait_for, get_text, eval, scroll_into_view, resize, take_screenshot, read_image) - For browser automation\n10. patch - For precise text edits in files\n\nThese tools allow me to help with various coding tasks, file exploration, text editing, git operations, browser automation, and more."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":4,"cache_creation_input_tokens":3776,"cache_read_input_tokens":0,"output_tokens":196}}
\ No newline at end of file
+{"id":"msg_012bL4iRbhpsvzdekbT6Jheq","type":"message","role":"assistant","model":"claude-3-7-sonnet-20250219","content":[{"type":"text","text":"I'll list the tools available to me briefly:\n\n1. bash - Execute shell commands\n2. keyword_search - Search for files with keywords\n3. think - Take notes, form plans (no external effects)\n4. title - Set conversation title\n5. precommit - Create a git branch for tracking work\n6. done - Complete a task with checklist verification\n7. codereview - Run automated code review\n8. multiplechoice - Present the user with multiple choice options\n9. browser_navigate - Navigate to a URL\n10. browser_click - Click an element on a webpage\n11. browser_type - Type text into a web form element\n12. browser_wait_for - Wait for an element to appear\n13. browser_get_text - Get text from a webpage element\n14. browser_eval - Run JavaScript in browser context\n15. browser_scroll_into_view - Scroll to make an element visible\n16. browser_resize - Resize browser window\n17. browser_recent_console_logs - Get browser console logs\n18. browser_clear_console_logs - Clear browser console logs\n19. browser_take_screenshot - Take screenshot of webpage\n20. browser_read_image - Read an image file\n21. patch - Make precise text edits to files\n\nThese tools allow me to perform a wide range of actions from running commands, searching code, browsing the web, manipulating files, and managing git workflows."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":4,"cache_creation_input_tokens":3888,"cache_read_input_tokens":0,"output_tokens":318}}
\ No newline at end of file