blob: 577a5a9085997c9d578fc29999319f4e220d2b37 [file] [log] [blame]
Philip Zeyliger33d282f2025-05-03 04:01:54 +00001// Package browse provides browser automation tools for the agent
2package browse
3
4import (
5 "context"
Philip Zeyliger72252cb2025-05-10 17:00:08 -07006 "encoding/base64"
Philip Zeyliger33d282f2025-05-03 04:01:54 +00007 "encoding/json"
8 "fmt"
9 "log"
Philip Zeyliger72252cb2025-05-10 17:00:08 -070010 "net/http"
Josh Bleecher Snyderbf381a72025-05-29 23:45:02 +000011 "net/url"
Philip Zeyliger33d282f2025-05-03 04:01:54 +000012 "os"
13 "path/filepath"
Philip Zeyliger72252cb2025-05-10 17:00:08 -070014 "strings"
Philip Zeyliger33d282f2025-05-03 04:01:54 +000015 "sync"
16 "time"
17
Philip Zeyliger18e33682025-05-13 16:34:21 -070018 "github.com/chromedp/cdproto/runtime"
Philip Zeyliger33d282f2025-05-03 04:01:54 +000019 "github.com/chromedp/chromedp"
20 "github.com/google/uuid"
21 "sketch.dev/llm"
22)
23
24// ScreenshotDir is the directory where screenshots are stored
25const ScreenshotDir = "/tmp/sketch-screenshots"
26
27// BrowseTools contains all browser tools and manages a shared browser instance
28type BrowseTools struct {
29 ctx context.Context
30 cancel context.CancelFunc
31 browserCtx context.Context
32 browserCtxCancel context.CancelFunc
33 mux sync.Mutex
34 initOnce sync.Once
35 initialized bool
36 initErr error
37 // Map to track screenshots by ID and their creation time
38 screenshots map[string]time.Time
39 screenshotsMutex sync.Mutex
Philip Zeyliger18e33682025-05-13 16:34:21 -070040 // Console logs storage
41 consoleLogs []*runtime.EventConsoleAPICalled
42 consoleLogsMutex sync.Mutex
43 maxConsoleLogs int
Philip Zeyliger33d282f2025-05-03 04:01:54 +000044}
45
46// NewBrowseTools creates a new set of browser automation tools
47func NewBrowseTools(ctx context.Context) *BrowseTools {
48 ctx, cancel := context.WithCancel(ctx)
49
50 // Ensure the screenshot directory exists
Autoformatter4962f152025-05-06 17:24:20 +000051 if err := os.MkdirAll(ScreenshotDir, 0o755); err != nil {
Philip Zeyliger33d282f2025-05-03 04:01:54 +000052 log.Printf("Failed to create screenshot directory: %v", err)
53 }
54
55 b := &BrowseTools{
Philip Zeyliger18e33682025-05-13 16:34:21 -070056 ctx: ctx,
57 cancel: cancel,
58 screenshots: make(map[string]time.Time),
59 consoleLogs: make([]*runtime.EventConsoleAPICalled, 0),
60 maxConsoleLogs: 100,
Philip Zeyliger33d282f2025-05-03 04:01:54 +000061 }
62
63 return b
64}
65
66// Initialize starts the browser if it's not already running
67func (b *BrowseTools) Initialize() error {
68 b.mux.Lock()
69 defer b.mux.Unlock()
70
71 b.initOnce.Do(func() {
72 // ChromeDP.ExecPath has a list of common places to find Chrome...
73 opts := chromedp.DefaultExecAllocatorOptions[:]
Philip Zeyligerc0131342025-06-13 21:07:08 -070074 // This is the default when running as root, but we generally need it
75 // when running in a container, even when we aren't root (which is largely
76 // the case for tests).
77 opts = append(opts, chromedp.NoSandbox)
Philip Zeyligera35de5f2025-06-14 12:00:48 -070078 // Setting 'DBUS_SESSION_BUS_ADDRESS=""' or this flag allows tests to pass
79 // in GitHub runner contexts. It's a mystery why the failure isn't clear when this fails.
80 opts = append(opts, chromedp.Flag("--disable-dbus", true))
81 // This can be pretty slow in tests
Philip Zeyligerfe51d1d2025-06-16 21:19:44 -070082 opts = append(opts, chromedp.WSURLReadTimeout(60*time.Second))
Philip Zeyliger9b39aa62025-07-14 11:56:02 -070083 // Add environment variable to mark this as a sketch internal process
84 opts = append(opts, chromedp.Env("SKETCH_IGNORE_PORTS=1"))
Philip Zeyliger33d282f2025-05-03 04:01:54 +000085 allocCtx, _ := chromedp.NewExecAllocator(b.ctx, opts...)
86 browserCtx, browserCancel := chromedp.NewContext(
87 allocCtx,
Philip Zeyligerfe51d1d2025-06-16 21:19:44 -070088 chromedp.WithLogf(log.Printf), chromedp.WithErrorf(log.Printf), chromedp.WithBrowserOption(chromedp.WithDialTimeout(60*time.Second)),
Philip Zeyliger33d282f2025-05-03 04:01:54 +000089 )
90
91 b.browserCtx = browserCtx
92 b.browserCtxCancel = browserCancel
93
Philip Zeyliger18e33682025-05-13 16:34:21 -070094 // Set up console log listener
95 chromedp.ListenTarget(browserCtx, func(ev any) {
96 switch e := ev.(type) {
97 case *runtime.EventConsoleAPICalled:
98 b.captureConsoleLog(e)
99 }
100 })
101
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000102 // Ensure the browser starts
103 if err := chromedp.Run(browserCtx); err != nil {
104 b.initErr = fmt.Errorf("failed to start browser (please apt get chromium or equivalent): %w", err)
105 return
106 }
Josh Bleecher Snyder7fbc8e42025-05-29 19:42:25 +0000107
108 // Set default viewport size to 1280x720 (16:9 widescreen)
109 if err := chromedp.Run(browserCtx, chromedp.EmulateViewport(1280, 720)); err != nil {
110 b.initErr = fmt.Errorf("failed to set default viewport: %w", err)
111 return
112 }
113
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000114 b.initialized = true
115 })
116
117 return b.initErr
118}
119
120// Close shuts down the browser
121func (b *BrowseTools) Close() {
122 b.mux.Lock()
123 defer b.mux.Unlock()
124
125 if b.browserCtxCancel != nil {
126 b.browserCtxCancel()
127 b.browserCtxCancel = nil
128 }
129
130 if b.cancel != nil {
131 b.cancel()
132 }
133
134 b.initialized = false
135 log.Println("Browser closed")
136}
137
138// GetBrowserContext returns the context for browser operations
139func (b *BrowseTools) GetBrowserContext() (context.Context, error) {
140 if err := b.Initialize(); err != nil {
141 return nil, err
142 }
143 return b.browserCtx, nil
144}
145
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000146// NavigateTool definition
147type navigateInput struct {
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700148 URL string `json:"url"`
149 Timeout string `json:"timeout,omitempty"`
Josh Bleecher Snyderbf381a72025-05-29 23:45:02 +0000150}
151
152// isPort80 reports whether urlStr definitely uses port 80.
153func isPort80(urlStr string) bool {
154 parsedURL, err := url.Parse(urlStr)
155 if err != nil {
156 return false
157 }
158 port := parsedURL.Port()
159 return port == "80" || (port == "" && parsedURL.Scheme == "http")
160}
161
162// NewNavigateTool creates a tool for navigating to URLs
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000163func (b *BrowseTools) NewNavigateTool() *llm.Tool {
164 return &llm.Tool{
165 Name: "browser_navigate",
166 Description: "Navigate the browser to a specific URL and wait for page to load",
167 InputSchema: json.RawMessage(`{
168 "type": "object",
169 "properties": {
170 "url": {
171 "type": "string",
172 "description": "The URL to navigate to"
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700173 },
174 "timeout": {
175 "type": "string",
176 "description": "Timeout as a Go duration string (default: 5s)"
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000177 }
178 },
179 "required": ["url"]
180 }`),
181 Run: b.navigateRun,
182 }
183}
184
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700185func (b *BrowseTools) navigateRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000186 var input navigateInput
187 if err := json.Unmarshal(m, &input); err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700188 return llm.ErrorfToolOut("invalid input: %w", err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000189 }
190
Josh Bleecher Snyderbf381a72025-05-29 23:45:02 +0000191 if isPort80(input.URL) {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700192 return llm.ErrorToolOut(fmt.Errorf("port 80 is not the port you're looking for--port 80 is the main sketch server"))
Josh Bleecher Snyderbf381a72025-05-29 23:45:02 +0000193 }
194
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000195 browserCtx, err := b.GetBrowserContext()
196 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700197 return llm.ErrorToolOut(err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000198 }
199
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700200 // Create a timeout context for this operation
201 timeoutCtx, cancel := context.WithTimeout(browserCtx, parseTimeout(input.Timeout))
202 defer cancel()
203
204 err = chromedp.Run(timeoutCtx,
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000205 chromedp.Navigate(input.URL),
206 chromedp.WaitReady("body"),
207 )
208 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700209 return llm.ErrorToolOut(err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000210 }
211
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700212 return llm.ToolOut{LLMContent: llm.TextContent("done")}
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000213}
214
215// ClickTool definition
216type clickInput struct {
217 Selector string `json:"selector"`
218 WaitVisible bool `json:"wait_visible,omitempty"`
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700219 Timeout string `json:"timeout,omitempty"`
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000220}
221
222// NewClickTool creates a tool for clicking elements
223func (b *BrowseTools) NewClickTool() *llm.Tool {
224 return &llm.Tool{
225 Name: "browser_click",
226 Description: "Click the first element matching a CSS selector",
227 InputSchema: json.RawMessage(`{
228 "type": "object",
229 "properties": {
230 "selector": {
231 "type": "string",
232 "description": "CSS selector for the element to click"
233 },
234 "wait_visible": {
235 "type": "boolean",
236 "description": "Wait for the element to be visible before clicking"
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700237 },
238 "timeout": {
239 "type": "string",
240 "description": "Timeout as a Go duration string (default: 5s)"
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000241 }
242 },
243 "required": ["selector"]
244 }`),
245 Run: b.clickRun,
246 }
247}
248
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700249func (b *BrowseTools) clickRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000250 var input clickInput
251 if err := json.Unmarshal(m, &input); err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700252 return llm.ErrorfToolOut("invalid input: %w", err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000253 }
254
255 browserCtx, err := b.GetBrowserContext()
256 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700257 return llm.ErrorToolOut(err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000258 }
259
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700260 // Create a timeout context for this operation
261 timeoutCtx, cancel := context.WithTimeout(browserCtx, parseTimeout(input.Timeout))
262 defer cancel()
263
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000264 actions := []chromedp.Action{
265 chromedp.WaitReady(input.Selector),
266 }
267
268 if input.WaitVisible {
269 actions = append(actions, chromedp.WaitVisible(input.Selector))
270 }
271
272 actions = append(actions, chromedp.Click(input.Selector))
273
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700274 err = chromedp.Run(timeoutCtx, actions...)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000275 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700276 return llm.ErrorToolOut(err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000277 }
278
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700279 return llm.ToolOut{LLMContent: llm.TextContent("done")}
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000280}
281
282// TypeTool definition
283type typeInput struct {
284 Selector string `json:"selector"`
285 Text string `json:"text"`
286 Clear bool `json:"clear,omitempty"`
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700287 Timeout string `json:"timeout,omitempty"`
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000288}
289
290// NewTypeTool creates a tool for typing into input elements
291func (b *BrowseTools) NewTypeTool() *llm.Tool {
292 return &llm.Tool{
293 Name: "browser_type",
294 Description: "Type text into an input or textarea element",
295 InputSchema: json.RawMessage(`{
296 "type": "object",
297 "properties": {
298 "selector": {
299 "type": "string",
300 "description": "CSS selector for the input element"
301 },
302 "text": {
303 "type": "string",
304 "description": "Text to type into the element"
305 },
306 "clear": {
307 "type": "boolean",
308 "description": "Clear the input field before typing"
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700309 },
310 "timeout": {
311 "type": "string",
312 "description": "Timeout as a Go duration string (default: 5s)"
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000313 }
314 },
315 "required": ["selector", "text"]
316 }`),
317 Run: b.typeRun,
318 }
319}
320
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700321func (b *BrowseTools) typeRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000322 var input typeInput
323 if err := json.Unmarshal(m, &input); err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700324 return llm.ErrorfToolOut("invalid input: %w", err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000325 }
326
327 browserCtx, err := b.GetBrowserContext()
328 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700329 return llm.ErrorToolOut(err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000330 }
331
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700332 // Create a timeout context for this operation
333 timeoutCtx, cancel := context.WithTimeout(browserCtx, parseTimeout(input.Timeout))
334 defer cancel()
335
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000336 actions := []chromedp.Action{
337 chromedp.WaitReady(input.Selector),
338 chromedp.WaitVisible(input.Selector),
339 }
340
341 if input.Clear {
342 actions = append(actions, chromedp.Clear(input.Selector))
343 }
344
345 actions = append(actions, chromedp.SendKeys(input.Selector, input.Text))
346
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700347 err = chromedp.Run(timeoutCtx, actions...)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000348 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700349 return llm.ErrorToolOut(err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000350 }
351
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700352 return llm.ToolOut{LLMContent: llm.TextContent("done")}
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000353}
354
355// WaitForTool definition
356type waitForInput struct {
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700357 Selector string `json:"selector"`
358 Timeout string `json:"timeout,omitempty"`
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000359}
360
361// NewWaitForTool creates a tool for waiting for elements
362func (b *BrowseTools) NewWaitForTool() *llm.Tool {
363 return &llm.Tool{
364 Name: "browser_wait_for",
365 Description: "Wait for an element to be present in the DOM",
366 InputSchema: json.RawMessage(`{
367 "type": "object",
368 "properties": {
369 "selector": {
370 "type": "string",
371 "description": "CSS selector for the element to wait for"
372 },
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700373 "timeout": {
374 "type": "string",
375 "description": "Timeout as a Go duration string (default: 5s)"
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000376 }
377 },
378 "required": ["selector"]
379 }`),
380 Run: b.waitForRun,
381 }
382}
383
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700384func (b *BrowseTools) waitForRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000385 var input waitForInput
386 if err := json.Unmarshal(m, &input); err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700387 return llm.ErrorfToolOut("invalid input: %w", err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000388 }
389
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000390 browserCtx, err := b.GetBrowserContext()
391 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700392 return llm.ErrorToolOut(err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000393 }
394
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700395 timeoutCtx, cancel := context.WithTimeout(browserCtx, parseTimeout(input.Timeout))
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000396 defer cancel()
397
398 err = chromedp.Run(timeoutCtx, chromedp.WaitReady(input.Selector))
399 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700400 return llm.ErrorToolOut(err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000401 }
402
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700403 return llm.ToolOut{LLMContent: llm.TextContent("done")}
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000404}
405
406// GetTextTool definition
407type getTextInput struct {
408 Selector string `json:"selector"`
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700409 Timeout string `json:"timeout,omitempty"`
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000410}
411
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000412// NewGetTextTool creates a tool for getting text from elements
413func (b *BrowseTools) NewGetTextTool() *llm.Tool {
414 return &llm.Tool{
415 Name: "browser_get_text",
Josh Bleecher Snydercb557262025-06-30 23:55:20 +0000416 Description: "Get the innerText of an element, returned in innerText tag. Can be used to read the web page.",
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000417 InputSchema: json.RawMessage(`{
418 "type": "object",
419 "properties": {
420 "selector": {
421 "type": "string",
422 "description": "CSS selector for the element to get text from"
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700423 },
424 "timeout": {
425 "type": "string",
426 "description": "Timeout as a Go duration string (default: 5s)"
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000427 }
428 },
429 "required": ["selector"]
430 }`),
431 Run: b.getTextRun,
432 }
433}
434
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700435func (b *BrowseTools) getTextRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000436 var input getTextInput
437 if err := json.Unmarshal(m, &input); err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700438 return llm.ErrorfToolOut("invalid input: %w", err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000439 }
440
441 browserCtx, err := b.GetBrowserContext()
442 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700443 return llm.ErrorToolOut(err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000444 }
445
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700446 // Create a timeout context for this operation
447 timeoutCtx, cancel := context.WithTimeout(browserCtx, parseTimeout(input.Timeout))
448 defer cancel()
449
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000450 var text string
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700451 err = chromedp.Run(timeoutCtx,
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000452 chromedp.WaitReady(input.Selector),
453 chromedp.Text(input.Selector, &text),
454 )
455 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700456 return llm.ErrorToolOut(err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000457 }
458
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700459 return llm.ToolOut{LLMContent: llm.TextContent("<innerText>" + text + "</innerText>")}
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000460}
461
462// EvalTool definition
463type evalInput struct {
464 Expression string `json:"expression"`
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700465 Timeout string `json:"timeout,omitempty"`
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000466}
467
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000468// NewEvalTool creates a tool for evaluating JavaScript
469func (b *BrowseTools) NewEvalTool() *llm.Tool {
470 return &llm.Tool{
471 Name: "browser_eval",
472 Description: "Evaluate JavaScript in the browser context",
473 InputSchema: json.RawMessage(`{
474 "type": "object",
475 "properties": {
476 "expression": {
477 "type": "string",
478 "description": "JavaScript expression to evaluate"
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700479 },
480 "timeout": {
481 "type": "string",
482 "description": "Timeout as a Go duration string (default: 5s)"
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000483 }
484 },
485 "required": ["expression"]
486 }`),
487 Run: b.evalRun,
488 }
489}
490
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700491func (b *BrowseTools) evalRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000492 var input evalInput
493 if err := json.Unmarshal(m, &input); err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700494 return llm.ErrorfToolOut("invalid input: %w", err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000495 }
496
497 browserCtx, err := b.GetBrowserContext()
498 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700499 return llm.ErrorToolOut(err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000500 }
501
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700502 // Create a timeout context for this operation
503 timeoutCtx, cancel := context.WithTimeout(browserCtx, parseTimeout(input.Timeout))
504 defer cancel()
505
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000506 var result any
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700507 err = chromedp.Run(timeoutCtx, chromedp.Evaluate(input.Expression, &result))
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000508 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700509 return llm.ErrorToolOut(err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000510 }
511
Josh Bleecher Snydercb557262025-06-30 23:55:20 +0000512 // Return the result as JSON
513 response, err := json.Marshal(result)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000514 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700515 return llm.ErrorfToolOut("failed to marshal response: %w", err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000516 }
517
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700518 return llm.ToolOut{LLMContent: llm.TextContent("<javascript_result>" + string(response) + "</javascript_result>")}
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000519}
520
521// ScreenshotTool definition
522type screenshotInput struct {
523 Selector string `json:"selector,omitempty"`
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700524 Timeout string `json:"timeout,omitempty"`
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000525}
526
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000527// NewScreenshotTool creates a tool for taking screenshots
528func (b *BrowseTools) NewScreenshotTool() *llm.Tool {
529 return &llm.Tool{
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700530 Name: "browser_take_screenshot",
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000531 Description: "Take a screenshot of the page or a specific element",
532 InputSchema: json.RawMessage(`{
533 "type": "object",
534 "properties": {
535 "selector": {
536 "type": "string",
Josh Bleecher Snyder74d690e2025-05-14 18:16:03 -0700537 "description": "CSS selector for the element to screenshot (optional)"
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000538 },
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700539 "timeout": {
540 "type": "string",
541 "description": "Timeout as a Go duration string (default: 5s)"
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000542 }
543 }
544 }`),
545 Run: b.screenshotRun,
546 }
547}
548
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700549func (b *BrowseTools) screenshotRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000550 var input screenshotInput
551 if err := json.Unmarshal(m, &input); err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700552 return llm.ErrorfToolOut("invalid input: %w", err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000553 }
554
555 browserCtx, err := b.GetBrowserContext()
556 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700557 return llm.ErrorToolOut(err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000558 }
559
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700560 // Create a timeout context for this operation
561 timeoutCtx, cancel := context.WithTimeout(browserCtx, parseTimeout(input.Timeout))
562 defer cancel()
563
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000564 var buf []byte
565 var actions []chromedp.Action
566
567 if input.Selector != "" {
568 // Take screenshot of specific element
569 actions = append(actions,
570 chromedp.WaitReady(input.Selector),
571 chromedp.Screenshot(input.Selector, &buf, chromedp.NodeVisible),
572 )
573 } else {
574 // Take full page screenshot
575 actions = append(actions, chromedp.CaptureScreenshot(&buf))
576 }
577
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700578 err = chromedp.Run(timeoutCtx, actions...)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000579 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700580 return llm.ErrorToolOut(err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000581 }
582
Philip Zeyliger542bda32025-06-11 18:31:03 -0700583 // Save the screenshot and get its ID for potential future reference
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000584 id := b.SaveScreenshot(buf)
585 if id == "" {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700586 return llm.ErrorToolOut(fmt.Errorf("failed to save screenshot"))
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000587 }
588
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700589 // Get the full path to the screenshot
590 screenshotPath := GetScreenshotPath(id)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000591
Philip Zeyliger542bda32025-06-11 18:31:03 -0700592 // Encode the image as base64
593 base64Data := base64.StdEncoding.EncodeToString(buf)
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700594
Philip Zeyliger542bda32025-06-11 18:31:03 -0700595 // Return the screenshot directly to the LLM
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700596 return llm.ToolOut{LLMContent: []llm.Content{
Philip Zeyliger542bda32025-06-11 18:31:03 -0700597 {
598 Type: llm.ContentTypeText,
599 Text: fmt.Sprintf("Screenshot taken (saved as %s)", screenshotPath),
600 },
601 {
602 Type: llm.ContentTypeText, // Will be mapped to image in content array
603 MediaType: "image/png",
604 Data: base64Data,
605 },
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700606 }}
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000607}
608
609// ScrollIntoViewTool definition
610type scrollIntoViewInput struct {
611 Selector string `json:"selector"`
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700612 Timeout string `json:"timeout,omitempty"`
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000613}
614
615// NewScrollIntoViewTool creates a tool for scrolling elements into view
616func (b *BrowseTools) NewScrollIntoViewTool() *llm.Tool {
617 return &llm.Tool{
618 Name: "browser_scroll_into_view",
619 Description: "Scroll an element into view if it's not visible",
620 InputSchema: json.RawMessage(`{
621 "type": "object",
622 "properties": {
623 "selector": {
624 "type": "string",
625 "description": "CSS selector for the element to scroll into view"
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700626 },
627 "timeout": {
628 "type": "string",
629 "description": "Timeout as a Go duration string (default: 5s)"
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000630 }
631 },
632 "required": ["selector"]
633 }`),
634 Run: b.scrollIntoViewRun,
635 }
636}
637
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700638func (b *BrowseTools) scrollIntoViewRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000639 var input scrollIntoViewInput
640 if err := json.Unmarshal(m, &input); err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700641 return llm.ErrorfToolOut("invalid input: %w", err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000642 }
643
644 browserCtx, err := b.GetBrowserContext()
645 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700646 return llm.ErrorToolOut(err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000647 }
648
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700649 // Create a timeout context for this operation
650 timeoutCtx, cancel := context.WithTimeout(browserCtx, parseTimeout(input.Timeout))
651 defer cancel()
652
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000653 script := fmt.Sprintf(`
654 const el = document.querySelector('%s');
655 if (el) {
656 el.scrollIntoView({behavior: 'smooth', block: 'center'});
657 return true;
658 }
659 return false;
660 `, input.Selector)
661
662 var result bool
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700663 err = chromedp.Run(timeoutCtx,
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000664 chromedp.WaitReady(input.Selector),
665 chromedp.Evaluate(script, &result),
666 )
667 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700668 return llm.ErrorToolOut(err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000669 }
670
671 if !result {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700672 return llm.ErrorToolOut(fmt.Errorf("element not found: %s", input.Selector))
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000673 }
674
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700675 return llm.ToolOut{LLMContent: llm.TextContent("done")}
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000676}
677
Philip Zeyliger05224842025-05-10 18:26:08 -0700678// ResizeTool definition
679type resizeInput struct {
680 Width int `json:"width"`
681 Height int `json:"height"`
682 Timeout string `json:"timeout,omitempty"`
683}
684
685// NewResizeTool creates a tool for resizing the browser window
686func (b *BrowseTools) NewResizeTool() *llm.Tool {
687 return &llm.Tool{
688 Name: "browser_resize",
689 Description: "Resize the browser window to a specific width and height",
690 InputSchema: json.RawMessage(`{
691 "type": "object",
692 "properties": {
693 "width": {
694 "type": "integer",
695 "description": "Window width in pixels"
696 },
697 "height": {
698 "type": "integer",
699 "description": "Window height in pixels"
700 },
701 "timeout": {
702 "type": "string",
703 "description": "Timeout as a Go duration string (default: 5s)"
704 }
705 },
706 "required": ["width", "height"]
707 }`),
708 Run: b.resizeRun,
709 }
710}
711
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700712func (b *BrowseTools) resizeRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
Philip Zeyliger05224842025-05-10 18:26:08 -0700713 var input resizeInput
714 if err := json.Unmarshal(m, &input); err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700715 return llm.ErrorfToolOut("invalid input: %w", err)
Philip Zeyliger05224842025-05-10 18:26:08 -0700716 }
717
718 browserCtx, err := b.GetBrowserContext()
719 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700720 return llm.ErrorToolOut(err)
Philip Zeyliger05224842025-05-10 18:26:08 -0700721 }
722
723 // Create a timeout context for this operation
724 timeoutCtx, cancel := context.WithTimeout(browserCtx, parseTimeout(input.Timeout))
725 defer cancel()
726
727 // Validate dimensions
728 if input.Width <= 0 || input.Height <= 0 {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700729 return llm.ErrorToolOut(fmt.Errorf("invalid dimensions: width and height must be positive"))
Philip Zeyliger05224842025-05-10 18:26:08 -0700730 }
731
732 // Resize the browser window
733 err = chromedp.Run(timeoutCtx,
734 chromedp.EmulateViewport(int64(input.Width), int64(input.Height)),
735 )
736 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700737 return llm.ErrorToolOut(err)
Philip Zeyliger05224842025-05-10 18:26:08 -0700738 }
739
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700740 return llm.ToolOut{LLMContent: llm.TextContent("done")}
Philip Zeyliger05224842025-05-10 18:26:08 -0700741}
742
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700743// GetTools returns browser tools, optionally filtering out screenshot-related tools
744func (b *BrowseTools) GetTools(includeScreenshotTools bool) []*llm.Tool {
745 tools := []*llm.Tool{
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000746 b.NewNavigateTool(),
747 b.NewClickTool(),
748 b.NewTypeTool(),
749 b.NewWaitForTool(),
750 b.NewGetTextTool(),
751 b.NewEvalTool(),
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000752 b.NewScrollIntoViewTool(),
Philip Zeyliger05224842025-05-10 18:26:08 -0700753 b.NewResizeTool(),
Philip Zeyliger18e33682025-05-13 16:34:21 -0700754 b.NewRecentConsoleLogsTool(),
755 b.NewClearConsoleLogsTool(),
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000756 }
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700757
758 // Add screenshot-related tools if supported
759 if includeScreenshotTools {
760 tools = append(tools, b.NewScreenshotTool())
761 tools = append(tools, b.NewReadImageTool())
762 }
763
764 return tools
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000765}
766
767// SaveScreenshot saves a screenshot to disk and returns its ID
768func (b *BrowseTools) SaveScreenshot(data []byte) string {
769 // Generate a unique ID
770 id := uuid.New().String()
771
772 // Save the file
773 filePath := filepath.Join(ScreenshotDir, id+".png")
Autoformatter4962f152025-05-06 17:24:20 +0000774 if err := os.WriteFile(filePath, data, 0o644); err != nil {
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000775 log.Printf("Failed to save screenshot: %v", err)
776 return ""
777 }
778
779 // Track this screenshot
780 b.screenshotsMutex.Lock()
781 b.screenshots[id] = time.Now()
782 b.screenshotsMutex.Unlock()
783
784 return id
785}
786
787// GetScreenshotPath returns the full path to a screenshot by ID
788func GetScreenshotPath(id string) string {
789 return filepath.Join(ScreenshotDir, id+".png")
790}
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700791
792// ReadImageTool definition
793type readImageInput struct {
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700794 Path string `json:"path"`
795 Timeout string `json:"timeout,omitempty"`
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700796}
797
798// NewReadImageTool creates a tool for reading images and returning them as base64 encoded data
799func (b *BrowseTools) NewReadImageTool() *llm.Tool {
800 return &llm.Tool{
Philip Zeyliger542bda32025-06-11 18:31:03 -0700801 Name: "read_image",
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700802 Description: "Read an image file (such as a screenshot) and encode it for sending to the LLM",
803 InputSchema: json.RawMessage(`{
804 "type": "object",
805 "properties": {
806 "path": {
807 "type": "string",
808 "description": "Path to the image file to read"
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700809 },
810 "timeout": {
811 "type": "string",
812 "description": "Timeout as a Go duration string (default: 5s)"
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700813 }
814 },
815 "required": ["path"]
816 }`),
817 Run: b.readImageRun,
818 }
819}
820
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700821func (b *BrowseTools) readImageRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700822 var input readImageInput
823 if err := json.Unmarshal(m, &input); err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700824 return llm.ErrorfToolOut("invalid input: %w", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700825 }
826
827 // Check if the path exists
828 if _, err := os.Stat(input.Path); os.IsNotExist(err) {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700829 return llm.ErrorfToolOut("image file not found: %s", input.Path)
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700830 }
831
832 // Read the file
833 imageData, err := os.ReadFile(input.Path)
834 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700835 return llm.ErrorfToolOut("failed to read image file: %w", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700836 }
837
838 // Detect the image type
839 imageType := http.DetectContentType(imageData)
840 if !strings.HasPrefix(imageType, "image/") {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700841 return llm.ErrorfToolOut("file is not an image: %s", imageType)
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700842 }
843
844 // Encode the image as base64
845 base64Data := base64.StdEncoding.EncodeToString(imageData)
846
847 // Create a Content object that includes both text and the image
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700848 return llm.ToolOut{LLMContent: []llm.Content{
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700849 {
850 Type: llm.ContentTypeText,
851 Text: fmt.Sprintf("Image from %s (type: %s)", input.Path, imageType),
852 },
853 {
854 Type: llm.ContentTypeText, // Will be mapped to image in content array
855 MediaType: imageType,
856 Data: base64Data,
857 },
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700858 }}
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700859}
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700860
861// parseTimeout parses a timeout string and returns a time.Duration
862// It returns a default of 5 seconds if the timeout is empty or invalid
863func parseTimeout(timeout string) time.Duration {
864 if timeout == "" {
865 return 5 * time.Second // default 5 seconds
866 }
867
868 dur, err := time.ParseDuration(timeout)
869 if err != nil {
870 // If parsing fails, return the default
871 return 5 * time.Second
872 }
873
874 return dur
875}
Philip Zeyliger18e33682025-05-13 16:34:21 -0700876
877// captureConsoleLog captures a console log event and stores it
878func (b *BrowseTools) captureConsoleLog(e *runtime.EventConsoleAPICalled) {
879 // Add to logs with mutex protection
880 b.consoleLogsMutex.Lock()
881 defer b.consoleLogsMutex.Unlock()
882
883 // Add the log and maintain max size
884 b.consoleLogs = append(b.consoleLogs, e)
885 if len(b.consoleLogs) > b.maxConsoleLogs {
886 b.consoleLogs = b.consoleLogs[len(b.consoleLogs)-b.maxConsoleLogs:]
887 }
888}
889
890// RecentConsoleLogsTool definition
891type recentConsoleLogsInput struct {
892 Limit int `json:"limit,omitempty"`
893}
894
895// NewRecentConsoleLogsTool creates a tool for retrieving recent console logs
896func (b *BrowseTools) NewRecentConsoleLogsTool() *llm.Tool {
897 return &llm.Tool{
898 Name: "browser_recent_console_logs",
899 Description: "Get recent browser console logs",
900 InputSchema: json.RawMessage(`{
901 "type": "object",
902 "properties": {
903 "limit": {
904 "type": "integer",
905 "description": "Maximum number of log entries to return (default: 100)"
906 }
907 }
908 }`),
909 Run: b.recentConsoleLogsRun,
910 }
911}
912
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700913func (b *BrowseTools) recentConsoleLogsRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
Philip Zeyliger18e33682025-05-13 16:34:21 -0700914 var input recentConsoleLogsInput
915 if err := json.Unmarshal(m, &input); err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700916 return llm.ErrorfToolOut("invalid input: %w", err)
Philip Zeyliger18e33682025-05-13 16:34:21 -0700917 }
918
919 // Ensure browser is initialized
920 _, err := b.GetBrowserContext()
921 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700922 return llm.ErrorToolOut(err)
Philip Zeyliger18e33682025-05-13 16:34:21 -0700923 }
924
925 // Apply limit (default to 100 if not specified)
926 limit := 100
927 if input.Limit > 0 {
928 limit = input.Limit
929 }
930
931 // Get console logs with mutex protection
932 b.consoleLogsMutex.Lock()
933 logs := make([]*runtime.EventConsoleAPICalled, 0, len(b.consoleLogs))
934 start := 0
935 if len(b.consoleLogs) > limit {
936 start = len(b.consoleLogs) - limit
937 }
938 logs = append(logs, b.consoleLogs[start:]...)
939 b.consoleLogsMutex.Unlock()
940
941 // Format the logs as JSON
942 logData, err := json.MarshalIndent(logs, "", " ")
943 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700944 return llm.ErrorfToolOut("failed to serialize logs: %w", err)
Philip Zeyliger18e33682025-05-13 16:34:21 -0700945 }
946
947 // Format the logs
948 var sb strings.Builder
949 sb.WriteString(fmt.Sprintf("Retrieved %d console log entries:\n\n", len(logs)))
950
951 if len(logs) == 0 {
952 sb.WriteString("No console logs captured.")
953 } else {
954 // Add the JSON data for full details
955 sb.WriteString(string(logData))
956 }
957
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700958 return llm.ToolOut{LLMContent: llm.TextContent(sb.String())}
Philip Zeyliger18e33682025-05-13 16:34:21 -0700959}
960
961// ClearConsoleLogsTool definition
962type clearConsoleLogsInput struct{}
963
964// NewClearConsoleLogsTool creates a tool for clearing console logs
965func (b *BrowseTools) NewClearConsoleLogsTool() *llm.Tool {
966 return &llm.Tool{
967 Name: "browser_clear_console_logs",
968 Description: "Clear all captured browser console logs",
Josh Bleecher Snyder74d690e2025-05-14 18:16:03 -0700969 InputSchema: llm.EmptySchema(),
970 Run: b.clearConsoleLogsRun,
Philip Zeyliger18e33682025-05-13 16:34:21 -0700971 }
972}
973
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700974func (b *BrowseTools) clearConsoleLogsRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
Philip Zeyliger18e33682025-05-13 16:34:21 -0700975 var input clearConsoleLogsInput
976 if err := json.Unmarshal(m, &input); err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700977 return llm.ErrorfToolOut("invalid input: %w", err)
Philip Zeyliger18e33682025-05-13 16:34:21 -0700978 }
979
980 // Ensure browser is initialized
981 _, err := b.GetBrowserContext()
982 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700983 return llm.ErrorToolOut(err)
Philip Zeyliger18e33682025-05-13 16:34:21 -0700984 }
985
986 // Clear console logs with mutex protection
987 b.consoleLogsMutex.Lock()
988 logCount := len(b.consoleLogs)
989 b.consoleLogs = make([]*runtime.EventConsoleAPICalled, 0)
990 b.consoleLogsMutex.Unlock()
991
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700992 return llm.ToolOut{LLMContent: llm.TextContent(fmt.Sprintf("Cleared %d console log entries.", logCount))}
Philip Zeyliger18e33682025-05-13 16:34:21 -0700993}