blob: 99f6ea7cb49efbc1457066b4f1a96fb98aa2f1b2 [file] [log] [blame]
Philip Zeyliger33d282f2025-05-03 04:01:54 +00001// Package browse provides browser automation tools for the agent
2package browse
3
4import (
5 "context"
Philip Zeyliger72252cb2025-05-10 17:00:08 -07006 "encoding/base64"
Philip Zeyliger33d282f2025-05-03 04:01:54 +00007 "encoding/json"
8 "fmt"
9 "log"
Philip Zeyliger72252cb2025-05-10 17:00:08 -070010 "net/http"
Josh Bleecher Snyderbf381a72025-05-29 23:45:02 +000011 "net/url"
Philip Zeyliger33d282f2025-05-03 04:01:54 +000012 "os"
13 "path/filepath"
Philip Zeyliger72252cb2025-05-10 17:00:08 -070014 "strings"
Philip Zeyliger33d282f2025-05-03 04:01:54 +000015 "sync"
16 "time"
17
Philip Zeyliger18e33682025-05-13 16:34:21 -070018 "github.com/chromedp/cdproto/runtime"
Philip Zeyliger33d282f2025-05-03 04:01:54 +000019 "github.com/chromedp/chromedp"
20 "github.com/google/uuid"
21 "sketch.dev/llm"
22)
23
24// ScreenshotDir is the directory where screenshots are stored
25const ScreenshotDir = "/tmp/sketch-screenshots"
26
27// BrowseTools contains all browser tools and manages a shared browser instance
28type BrowseTools struct {
29 ctx context.Context
30 cancel context.CancelFunc
31 browserCtx context.Context
32 browserCtxCancel context.CancelFunc
33 mux sync.Mutex
34 initOnce sync.Once
35 initialized bool
36 initErr error
37 // Map to track screenshots by ID and their creation time
38 screenshots map[string]time.Time
39 screenshotsMutex sync.Mutex
Philip Zeyliger18e33682025-05-13 16:34:21 -070040 // Console logs storage
41 consoleLogs []*runtime.EventConsoleAPICalled
42 consoleLogsMutex sync.Mutex
43 maxConsoleLogs int
Philip Zeyliger33d282f2025-05-03 04:01:54 +000044}
45
46// NewBrowseTools creates a new set of browser automation tools
47func NewBrowseTools(ctx context.Context) *BrowseTools {
48 ctx, cancel := context.WithCancel(ctx)
49
50 // Ensure the screenshot directory exists
Autoformatter4962f152025-05-06 17:24:20 +000051 if err := os.MkdirAll(ScreenshotDir, 0o755); err != nil {
Philip Zeyliger33d282f2025-05-03 04:01:54 +000052 log.Printf("Failed to create screenshot directory: %v", err)
53 }
54
55 b := &BrowseTools{
Philip Zeyliger18e33682025-05-13 16:34:21 -070056 ctx: ctx,
57 cancel: cancel,
58 screenshots: make(map[string]time.Time),
59 consoleLogs: make([]*runtime.EventConsoleAPICalled, 0),
60 maxConsoleLogs: 100,
Philip Zeyliger33d282f2025-05-03 04:01:54 +000061 }
62
63 return b
64}
65
66// Initialize starts the browser if it's not already running
67func (b *BrowseTools) Initialize() error {
68 b.mux.Lock()
69 defer b.mux.Unlock()
70
71 b.initOnce.Do(func() {
72 // ChromeDP.ExecPath has a list of common places to find Chrome...
73 opts := chromedp.DefaultExecAllocatorOptions[:]
Philip Zeyligerc0131342025-06-13 21:07:08 -070074 // This is the default when running as root, but we generally need it
75 // when running in a container, even when we aren't root (which is largely
76 // the case for tests).
77 opts = append(opts, chromedp.NoSandbox)
Philip Zeyligera35de5f2025-06-14 12:00:48 -070078 // Setting 'DBUS_SESSION_BUS_ADDRESS=""' or this flag allows tests to pass
79 // in GitHub runner contexts. It's a mystery why the failure isn't clear when this fails.
80 opts = append(opts, chromedp.Flag("--disable-dbus", true))
81 // This can be pretty slow in tests
Philip Zeyligerfe51d1d2025-06-16 21:19:44 -070082 opts = append(opts, chromedp.WSURLReadTimeout(60*time.Second))
Philip Zeyliger9b39aa62025-07-14 11:56:02 -070083 // Add environment variable to mark this as a sketch internal process
84 opts = append(opts, chromedp.Env("SKETCH_IGNORE_PORTS=1"))
Philip Zeyliger33d282f2025-05-03 04:01:54 +000085 allocCtx, _ := chromedp.NewExecAllocator(b.ctx, opts...)
86 browserCtx, browserCancel := chromedp.NewContext(
87 allocCtx,
Philip Zeyligerfe51d1d2025-06-16 21:19:44 -070088 chromedp.WithLogf(log.Printf), chromedp.WithErrorf(log.Printf), chromedp.WithBrowserOption(chromedp.WithDialTimeout(60*time.Second)),
Philip Zeyliger33d282f2025-05-03 04:01:54 +000089 )
90
91 b.browserCtx = browserCtx
92 b.browserCtxCancel = browserCancel
93
Philip Zeyliger18e33682025-05-13 16:34:21 -070094 // Set up console log listener
95 chromedp.ListenTarget(browserCtx, func(ev any) {
96 switch e := ev.(type) {
97 case *runtime.EventConsoleAPICalled:
98 b.captureConsoleLog(e)
99 }
100 })
101
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000102 // Ensure the browser starts
103 if err := chromedp.Run(browserCtx); err != nil {
104 b.initErr = fmt.Errorf("failed to start browser (please apt get chromium or equivalent): %w", err)
105 return
106 }
Josh Bleecher Snyder7fbc8e42025-05-29 19:42:25 +0000107
108 // Set default viewport size to 1280x720 (16:9 widescreen)
109 if err := chromedp.Run(browserCtx, chromedp.EmulateViewport(1280, 720)); err != nil {
110 b.initErr = fmt.Errorf("failed to set default viewport: %w", err)
111 return
112 }
113
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000114 b.initialized = true
115 })
116
117 return b.initErr
118}
119
120// Close shuts down the browser
121func (b *BrowseTools) Close() {
122 b.mux.Lock()
123 defer b.mux.Unlock()
124
125 if b.browserCtxCancel != nil {
126 b.browserCtxCancel()
127 b.browserCtxCancel = nil
128 }
129
130 if b.cancel != nil {
131 b.cancel()
132 }
133
134 b.initialized = false
135 log.Println("Browser closed")
136}
137
138// GetBrowserContext returns the context for browser operations
139func (b *BrowseTools) GetBrowserContext() (context.Context, error) {
140 if err := b.Initialize(); err != nil {
141 return nil, err
142 }
143 return b.browserCtx, nil
144}
145
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000146// NavigateTool definition
147type navigateInput struct {
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700148 URL string `json:"url"`
149 Timeout string `json:"timeout,omitempty"`
Josh Bleecher Snyderbf381a72025-05-29 23:45:02 +0000150}
151
152// isPort80 reports whether urlStr definitely uses port 80.
153func isPort80(urlStr string) bool {
154 parsedURL, err := url.Parse(urlStr)
155 if err != nil {
156 return false
157 }
158 port := parsedURL.Port()
159 return port == "80" || (port == "" && parsedURL.Scheme == "http")
160}
161
162// NewNavigateTool creates a tool for navigating to URLs
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000163func (b *BrowseTools) NewNavigateTool() *llm.Tool {
164 return &llm.Tool{
165 Name: "browser_navigate",
166 Description: "Navigate the browser to a specific URL and wait for page to load",
167 InputSchema: json.RawMessage(`{
168 "type": "object",
169 "properties": {
170 "url": {
171 "type": "string",
172 "description": "The URL to navigate to"
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700173 },
174 "timeout": {
175 "type": "string",
176 "description": "Timeout as a Go duration string (default: 5s)"
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000177 }
178 },
179 "required": ["url"]
180 }`),
181 Run: b.navigateRun,
182 }
183}
184
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700185func (b *BrowseTools) navigateRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000186 var input navigateInput
187 if err := json.Unmarshal(m, &input); err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700188 return llm.ErrorfToolOut("invalid input: %w", err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000189 }
190
Josh Bleecher Snyderbf381a72025-05-29 23:45:02 +0000191 if isPort80(input.URL) {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700192 return llm.ErrorToolOut(fmt.Errorf("port 80 is not the port you're looking for--port 80 is the main sketch server"))
Josh Bleecher Snyderbf381a72025-05-29 23:45:02 +0000193 }
194
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000195 browserCtx, err := b.GetBrowserContext()
196 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700197 return llm.ErrorToolOut(err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000198 }
199
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700200 // Create a timeout context for this operation
201 timeoutCtx, cancel := context.WithTimeout(browserCtx, parseTimeout(input.Timeout))
202 defer cancel()
203
204 err = chromedp.Run(timeoutCtx,
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000205 chromedp.Navigate(input.URL),
206 chromedp.WaitReady("body"),
207 )
208 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700209 return llm.ErrorToolOut(err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000210 }
211
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700212 return llm.ToolOut{LLMContent: llm.TextContent("done")}
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000213}
214
215// ClickTool definition
216type clickInput struct {
217 Selector string `json:"selector"`
218 WaitVisible bool `json:"wait_visible,omitempty"`
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700219 Timeout string `json:"timeout,omitempty"`
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000220}
221
222// NewClickTool creates a tool for clicking elements
223func (b *BrowseTools) NewClickTool() *llm.Tool {
224 return &llm.Tool{
225 Name: "browser_click",
226 Description: "Click the first element matching a CSS selector",
227 InputSchema: json.RawMessage(`{
228 "type": "object",
229 "properties": {
230 "selector": {
231 "type": "string",
232 "description": "CSS selector for the element to click"
233 },
234 "wait_visible": {
235 "type": "boolean",
236 "description": "Wait for the element to be visible before clicking"
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700237 },
238 "timeout": {
239 "type": "string",
240 "description": "Timeout as a Go duration string (default: 5s)"
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000241 }
242 },
243 "required": ["selector"]
244 }`),
245 Run: b.clickRun,
246 }
247}
248
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700249func (b *BrowseTools) clickRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000250 var input clickInput
251 if err := json.Unmarshal(m, &input); err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700252 return llm.ErrorfToolOut("invalid input: %w", err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000253 }
254
255 browserCtx, err := b.GetBrowserContext()
256 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700257 return llm.ErrorToolOut(err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000258 }
259
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700260 // Create a timeout context for this operation
261 timeoutCtx, cancel := context.WithTimeout(browserCtx, parseTimeout(input.Timeout))
262 defer cancel()
263
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000264 actions := []chromedp.Action{
265 chromedp.WaitReady(input.Selector),
266 }
267
268 if input.WaitVisible {
269 actions = append(actions, chromedp.WaitVisible(input.Selector))
270 }
271
272 actions = append(actions, chromedp.Click(input.Selector))
273
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700274 err = chromedp.Run(timeoutCtx, actions...)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000275 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700276 return llm.ErrorToolOut(err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000277 }
278
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700279 return llm.ToolOut{LLMContent: llm.TextContent("done")}
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000280}
281
282// TypeTool definition
283type typeInput struct {
284 Selector string `json:"selector"`
285 Text string `json:"text"`
286 Clear bool `json:"clear,omitempty"`
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700287 Timeout string `json:"timeout,omitempty"`
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000288}
289
290// NewTypeTool creates a tool for typing into input elements
291func (b *BrowseTools) NewTypeTool() *llm.Tool {
292 return &llm.Tool{
293 Name: "browser_type",
294 Description: "Type text into an input or textarea element",
295 InputSchema: json.RawMessage(`{
296 "type": "object",
297 "properties": {
298 "selector": {
299 "type": "string",
300 "description": "CSS selector for the input element"
301 },
302 "text": {
303 "type": "string",
304 "description": "Text to type into the element"
305 },
306 "clear": {
307 "type": "boolean",
308 "description": "Clear the input field before typing"
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700309 },
310 "timeout": {
311 "type": "string",
312 "description": "Timeout as a Go duration string (default: 5s)"
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000313 }
314 },
315 "required": ["selector", "text"]
316 }`),
317 Run: b.typeRun,
318 }
319}
320
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700321func (b *BrowseTools) typeRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000322 var input typeInput
323 if err := json.Unmarshal(m, &input); err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700324 return llm.ErrorfToolOut("invalid input: %w", err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000325 }
326
327 browserCtx, err := b.GetBrowserContext()
328 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700329 return llm.ErrorToolOut(err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000330 }
331
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700332 // Create a timeout context for this operation
333 timeoutCtx, cancel := context.WithTimeout(browserCtx, parseTimeout(input.Timeout))
334 defer cancel()
335
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000336 actions := []chromedp.Action{
337 chromedp.WaitReady(input.Selector),
338 chromedp.WaitVisible(input.Selector),
339 }
340
341 if input.Clear {
342 actions = append(actions, chromedp.Clear(input.Selector))
343 }
344
345 actions = append(actions, chromedp.SendKeys(input.Selector, input.Text))
346
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700347 err = chromedp.Run(timeoutCtx, actions...)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000348 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700349 return llm.ErrorToolOut(err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000350 }
351
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700352 return llm.ToolOut{LLMContent: llm.TextContent("done")}
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000353}
354
355// WaitForTool definition
356type waitForInput struct {
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700357 Selector string `json:"selector"`
358 Timeout string `json:"timeout,omitempty"`
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000359}
360
361// NewWaitForTool creates a tool for waiting for elements
362func (b *BrowseTools) NewWaitForTool() *llm.Tool {
363 return &llm.Tool{
364 Name: "browser_wait_for",
365 Description: "Wait for an element to be present in the DOM",
366 InputSchema: json.RawMessage(`{
367 "type": "object",
368 "properties": {
369 "selector": {
370 "type": "string",
371 "description": "CSS selector for the element to wait for"
372 },
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700373 "timeout": {
374 "type": "string",
375 "description": "Timeout as a Go duration string (default: 5s)"
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000376 }
377 },
378 "required": ["selector"]
379 }`),
380 Run: b.waitForRun,
381 }
382}
383
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700384func (b *BrowseTools) waitForRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000385 var input waitForInput
386 if err := json.Unmarshal(m, &input); err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700387 return llm.ErrorfToolOut("invalid input: %w", err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000388 }
389
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000390 browserCtx, err := b.GetBrowserContext()
391 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700392 return llm.ErrorToolOut(err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000393 }
394
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700395 timeoutCtx, cancel := context.WithTimeout(browserCtx, parseTimeout(input.Timeout))
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000396 defer cancel()
397
398 err = chromedp.Run(timeoutCtx, chromedp.WaitReady(input.Selector))
399 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700400 return llm.ErrorToolOut(err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000401 }
402
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700403 return llm.ToolOut{LLMContent: llm.TextContent("done")}
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000404}
405
406// GetTextTool definition
407type getTextInput struct {
408 Selector string `json:"selector"`
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700409 Timeout string `json:"timeout,omitempty"`
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000410}
411
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000412// NewGetTextTool creates a tool for getting text from elements
413func (b *BrowseTools) NewGetTextTool() *llm.Tool {
414 return &llm.Tool{
415 Name: "browser_get_text",
Josh Bleecher Snydercb557262025-06-30 23:55:20 +0000416 Description: "Get the innerText of an element, returned in innerText tag. Can be used to read the web page.",
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000417 InputSchema: json.RawMessage(`{
418 "type": "object",
419 "properties": {
420 "selector": {
421 "type": "string",
422 "description": "CSS selector for the element to get text from"
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700423 },
424 "timeout": {
425 "type": "string",
426 "description": "Timeout as a Go duration string (default: 5s)"
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000427 }
428 },
429 "required": ["selector"]
430 }`),
431 Run: b.getTextRun,
432 }
433}
434
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700435func (b *BrowseTools) getTextRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000436 var input getTextInput
437 if err := json.Unmarshal(m, &input); err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700438 return llm.ErrorfToolOut("invalid input: %w", err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000439 }
440
441 browserCtx, err := b.GetBrowserContext()
442 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700443 return llm.ErrorToolOut(err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000444 }
445
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700446 // Create a timeout context for this operation
447 timeoutCtx, cancel := context.WithTimeout(browserCtx, parseTimeout(input.Timeout))
448 defer cancel()
449
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000450 var text string
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700451 err = chromedp.Run(timeoutCtx,
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000452 chromedp.WaitReady(input.Selector),
453 chromedp.Text(input.Selector, &text),
454 )
455 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700456 return llm.ErrorToolOut(err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000457 }
458
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700459 return llm.ToolOut{LLMContent: llm.TextContent("<innerText>" + text + "</innerText>")}
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000460}
461
462// EvalTool definition
463type evalInput struct {
464 Expression string `json:"expression"`
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700465 Timeout string `json:"timeout,omitempty"`
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000466}
467
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000468// NewEvalTool creates a tool for evaluating JavaScript
469func (b *BrowseTools) NewEvalTool() *llm.Tool {
470 return &llm.Tool{
471 Name: "browser_eval",
472 Description: "Evaluate JavaScript in the browser context",
473 InputSchema: json.RawMessage(`{
474 "type": "object",
475 "properties": {
476 "expression": {
477 "type": "string",
478 "description": "JavaScript expression to evaluate"
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700479 },
480 "timeout": {
481 "type": "string",
482 "description": "Timeout as a Go duration string (default: 5s)"
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000483 }
484 },
485 "required": ["expression"]
486 }`),
487 Run: b.evalRun,
488 }
489}
490
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700491func (b *BrowseTools) evalRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000492 var input evalInput
493 if err := json.Unmarshal(m, &input); err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700494 return llm.ErrorfToolOut("invalid input: %w", err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000495 }
496
497 browserCtx, err := b.GetBrowserContext()
498 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700499 return llm.ErrorToolOut(err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000500 }
501
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700502 // Create a timeout context for this operation
503 timeoutCtx, cancel := context.WithTimeout(browserCtx, parseTimeout(input.Timeout))
504 defer cancel()
505
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000506 var result any
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700507 err = chromedp.Run(timeoutCtx, chromedp.Evaluate(input.Expression, &result))
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000508 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700509 return llm.ErrorToolOut(err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000510 }
511
Josh Bleecher Snydercb557262025-06-30 23:55:20 +0000512 // Return the result as JSON
513 response, err := json.Marshal(result)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000514 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700515 return llm.ErrorfToolOut("failed to marshal response: %w", err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000516 }
517
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700518 return llm.ToolOut{LLMContent: llm.TextContent("<javascript_result>" + string(response) + "</javascript_result>")}
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000519}
520
521// ScreenshotTool definition
522type screenshotInput struct {
523 Selector string `json:"selector,omitempty"`
524 Format string `json:"format,omitempty"`
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700525 Timeout string `json:"timeout,omitempty"`
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000526}
527
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000528// NewScreenshotTool creates a tool for taking screenshots
529func (b *BrowseTools) NewScreenshotTool() *llm.Tool {
530 return &llm.Tool{
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700531 Name: "browser_take_screenshot",
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000532 Description: "Take a screenshot of the page or a specific element",
533 InputSchema: json.RawMessage(`{
534 "type": "object",
535 "properties": {
536 "selector": {
537 "type": "string",
Josh Bleecher Snyder74d690e2025-05-14 18:16:03 -0700538 "description": "CSS selector for the element to screenshot (optional)"
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000539 },
540 "format": {
541 "type": "string",
542 "description": "Output format ('base64' or 'png'), defaults to 'base64'",
543 "enum": ["base64", "png"]
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700544 },
545 "timeout": {
546 "type": "string",
547 "description": "Timeout as a Go duration string (default: 5s)"
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000548 }
549 }
550 }`),
551 Run: b.screenshotRun,
552 }
553}
554
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700555func (b *BrowseTools) screenshotRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000556 var input screenshotInput
557 if err := json.Unmarshal(m, &input); err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700558 return llm.ErrorfToolOut("invalid input: %w", err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000559 }
560
561 browserCtx, err := b.GetBrowserContext()
562 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700563 return llm.ErrorToolOut(err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000564 }
565
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700566 // Create a timeout context for this operation
567 timeoutCtx, cancel := context.WithTimeout(browserCtx, parseTimeout(input.Timeout))
568 defer cancel()
569
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000570 var buf []byte
571 var actions []chromedp.Action
572
573 if input.Selector != "" {
574 // Take screenshot of specific element
575 actions = append(actions,
576 chromedp.WaitReady(input.Selector),
577 chromedp.Screenshot(input.Selector, &buf, chromedp.NodeVisible),
578 )
579 } else {
580 // Take full page screenshot
581 actions = append(actions, chromedp.CaptureScreenshot(&buf))
582 }
583
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700584 err = chromedp.Run(timeoutCtx, actions...)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000585 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700586 return llm.ErrorToolOut(err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000587 }
588
Philip Zeyliger542bda32025-06-11 18:31:03 -0700589 // Save the screenshot and get its ID for potential future reference
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000590 id := b.SaveScreenshot(buf)
591 if id == "" {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700592 return llm.ErrorToolOut(fmt.Errorf("failed to save screenshot"))
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000593 }
594
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700595 // Get the full path to the screenshot
596 screenshotPath := GetScreenshotPath(id)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000597
Philip Zeyliger542bda32025-06-11 18:31:03 -0700598 // Encode the image as base64
599 base64Data := base64.StdEncoding.EncodeToString(buf)
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700600
Philip Zeyliger542bda32025-06-11 18:31:03 -0700601 // Return the screenshot directly to the LLM
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700602 return llm.ToolOut{LLMContent: []llm.Content{
Philip Zeyliger542bda32025-06-11 18:31:03 -0700603 {
604 Type: llm.ContentTypeText,
605 Text: fmt.Sprintf("Screenshot taken (saved as %s)", screenshotPath),
606 },
607 {
608 Type: llm.ContentTypeText, // Will be mapped to image in content array
609 MediaType: "image/png",
610 Data: base64Data,
611 },
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700612 }}
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000613}
614
615// ScrollIntoViewTool definition
616type scrollIntoViewInput struct {
617 Selector string `json:"selector"`
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700618 Timeout string `json:"timeout,omitempty"`
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000619}
620
621// NewScrollIntoViewTool creates a tool for scrolling elements into view
622func (b *BrowseTools) NewScrollIntoViewTool() *llm.Tool {
623 return &llm.Tool{
624 Name: "browser_scroll_into_view",
625 Description: "Scroll an element into view if it's not visible",
626 InputSchema: json.RawMessage(`{
627 "type": "object",
628 "properties": {
629 "selector": {
630 "type": "string",
631 "description": "CSS selector for the element to scroll into view"
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700632 },
633 "timeout": {
634 "type": "string",
635 "description": "Timeout as a Go duration string (default: 5s)"
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000636 }
637 },
638 "required": ["selector"]
639 }`),
640 Run: b.scrollIntoViewRun,
641 }
642}
643
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700644func (b *BrowseTools) scrollIntoViewRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000645 var input scrollIntoViewInput
646 if err := json.Unmarshal(m, &input); err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700647 return llm.ErrorfToolOut("invalid input: %w", err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000648 }
649
650 browserCtx, err := b.GetBrowserContext()
651 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700652 return llm.ErrorToolOut(err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000653 }
654
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700655 // Create a timeout context for this operation
656 timeoutCtx, cancel := context.WithTimeout(browserCtx, parseTimeout(input.Timeout))
657 defer cancel()
658
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000659 script := fmt.Sprintf(`
660 const el = document.querySelector('%s');
661 if (el) {
662 el.scrollIntoView({behavior: 'smooth', block: 'center'});
663 return true;
664 }
665 return false;
666 `, input.Selector)
667
668 var result bool
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700669 err = chromedp.Run(timeoutCtx,
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000670 chromedp.WaitReady(input.Selector),
671 chromedp.Evaluate(script, &result),
672 )
673 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700674 return llm.ErrorToolOut(err)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000675 }
676
677 if !result {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700678 return llm.ErrorToolOut(fmt.Errorf("element not found: %s", input.Selector))
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000679 }
680
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700681 return llm.ToolOut{LLMContent: llm.TextContent("done")}
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000682}
683
Philip Zeyliger05224842025-05-10 18:26:08 -0700684// ResizeTool definition
685type resizeInput struct {
686 Width int `json:"width"`
687 Height int `json:"height"`
688 Timeout string `json:"timeout,omitempty"`
689}
690
691// NewResizeTool creates a tool for resizing the browser window
692func (b *BrowseTools) NewResizeTool() *llm.Tool {
693 return &llm.Tool{
694 Name: "browser_resize",
695 Description: "Resize the browser window to a specific width and height",
696 InputSchema: json.RawMessage(`{
697 "type": "object",
698 "properties": {
699 "width": {
700 "type": "integer",
701 "description": "Window width in pixels"
702 },
703 "height": {
704 "type": "integer",
705 "description": "Window height in pixels"
706 },
707 "timeout": {
708 "type": "string",
709 "description": "Timeout as a Go duration string (default: 5s)"
710 }
711 },
712 "required": ["width", "height"]
713 }`),
714 Run: b.resizeRun,
715 }
716}
717
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700718func (b *BrowseTools) resizeRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
Philip Zeyliger05224842025-05-10 18:26:08 -0700719 var input resizeInput
720 if err := json.Unmarshal(m, &input); err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700721 return llm.ErrorfToolOut("invalid input: %w", err)
Philip Zeyliger05224842025-05-10 18:26:08 -0700722 }
723
724 browserCtx, err := b.GetBrowserContext()
725 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700726 return llm.ErrorToolOut(err)
Philip Zeyliger05224842025-05-10 18:26:08 -0700727 }
728
729 // Create a timeout context for this operation
730 timeoutCtx, cancel := context.WithTimeout(browserCtx, parseTimeout(input.Timeout))
731 defer cancel()
732
733 // Validate dimensions
734 if input.Width <= 0 || input.Height <= 0 {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700735 return llm.ErrorToolOut(fmt.Errorf("invalid dimensions: width and height must be positive"))
Philip Zeyliger05224842025-05-10 18:26:08 -0700736 }
737
738 // Resize the browser window
739 err = chromedp.Run(timeoutCtx,
740 chromedp.EmulateViewport(int64(input.Width), int64(input.Height)),
741 )
742 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700743 return llm.ErrorToolOut(err)
Philip Zeyliger05224842025-05-10 18:26:08 -0700744 }
745
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700746 return llm.ToolOut{LLMContent: llm.TextContent("done")}
Philip Zeyliger05224842025-05-10 18:26:08 -0700747}
748
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700749// GetTools returns browser tools, optionally filtering out screenshot-related tools
750func (b *BrowseTools) GetTools(includeScreenshotTools bool) []*llm.Tool {
751 tools := []*llm.Tool{
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000752 b.NewNavigateTool(),
753 b.NewClickTool(),
754 b.NewTypeTool(),
755 b.NewWaitForTool(),
756 b.NewGetTextTool(),
757 b.NewEvalTool(),
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000758 b.NewScrollIntoViewTool(),
Philip Zeyliger05224842025-05-10 18:26:08 -0700759 b.NewResizeTool(),
Philip Zeyliger18e33682025-05-13 16:34:21 -0700760 b.NewRecentConsoleLogsTool(),
761 b.NewClearConsoleLogsTool(),
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000762 }
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700763
764 // Add screenshot-related tools if supported
765 if includeScreenshotTools {
766 tools = append(tools, b.NewScreenshotTool())
767 tools = append(tools, b.NewReadImageTool())
768 }
769
770 return tools
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000771}
772
773// SaveScreenshot saves a screenshot to disk and returns its ID
774func (b *BrowseTools) SaveScreenshot(data []byte) string {
775 // Generate a unique ID
776 id := uuid.New().String()
777
778 // Save the file
779 filePath := filepath.Join(ScreenshotDir, id+".png")
Autoformatter4962f152025-05-06 17:24:20 +0000780 if err := os.WriteFile(filePath, data, 0o644); err != nil {
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000781 log.Printf("Failed to save screenshot: %v", err)
782 return ""
783 }
784
785 // Track this screenshot
786 b.screenshotsMutex.Lock()
787 b.screenshots[id] = time.Now()
788 b.screenshotsMutex.Unlock()
789
790 return id
791}
792
793// GetScreenshotPath returns the full path to a screenshot by ID
794func GetScreenshotPath(id string) string {
795 return filepath.Join(ScreenshotDir, id+".png")
796}
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700797
798// ReadImageTool definition
799type readImageInput struct {
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700800 Path string `json:"path"`
801 Timeout string `json:"timeout,omitempty"`
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700802}
803
804// NewReadImageTool creates a tool for reading images and returning them as base64 encoded data
805func (b *BrowseTools) NewReadImageTool() *llm.Tool {
806 return &llm.Tool{
Philip Zeyliger542bda32025-06-11 18:31:03 -0700807 Name: "read_image",
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700808 Description: "Read an image file (such as a screenshot) and encode it for sending to the LLM",
809 InputSchema: json.RawMessage(`{
810 "type": "object",
811 "properties": {
812 "path": {
813 "type": "string",
814 "description": "Path to the image file to read"
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700815 },
816 "timeout": {
817 "type": "string",
818 "description": "Timeout as a Go duration string (default: 5s)"
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700819 }
820 },
821 "required": ["path"]
822 }`),
823 Run: b.readImageRun,
824 }
825}
826
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700827func (b *BrowseTools) readImageRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700828 var input readImageInput
829 if err := json.Unmarshal(m, &input); err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700830 return llm.ErrorfToolOut("invalid input: %w", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700831 }
832
833 // Check if the path exists
834 if _, err := os.Stat(input.Path); os.IsNotExist(err) {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700835 return llm.ErrorfToolOut("image file not found: %s", input.Path)
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700836 }
837
838 // Read the file
839 imageData, err := os.ReadFile(input.Path)
840 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700841 return llm.ErrorfToolOut("failed to read image file: %w", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700842 }
843
844 // Detect the image type
845 imageType := http.DetectContentType(imageData)
846 if !strings.HasPrefix(imageType, "image/") {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700847 return llm.ErrorfToolOut("file is not an image: %s", imageType)
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700848 }
849
850 // Encode the image as base64
851 base64Data := base64.StdEncoding.EncodeToString(imageData)
852
853 // Create a Content object that includes both text and the image
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700854 return llm.ToolOut{LLMContent: []llm.Content{
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700855 {
856 Type: llm.ContentTypeText,
857 Text: fmt.Sprintf("Image from %s (type: %s)", input.Path, imageType),
858 },
859 {
860 Type: llm.ContentTypeText, // Will be mapped to image in content array
861 MediaType: imageType,
862 Data: base64Data,
863 },
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700864 }}
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700865}
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700866
867// parseTimeout parses a timeout string and returns a time.Duration
868// It returns a default of 5 seconds if the timeout is empty or invalid
869func parseTimeout(timeout string) time.Duration {
870 if timeout == "" {
871 return 5 * time.Second // default 5 seconds
872 }
873
874 dur, err := time.ParseDuration(timeout)
875 if err != nil {
876 // If parsing fails, return the default
877 return 5 * time.Second
878 }
879
880 return dur
881}
Philip Zeyliger18e33682025-05-13 16:34:21 -0700882
883// captureConsoleLog captures a console log event and stores it
884func (b *BrowseTools) captureConsoleLog(e *runtime.EventConsoleAPICalled) {
885 // Add to logs with mutex protection
886 b.consoleLogsMutex.Lock()
887 defer b.consoleLogsMutex.Unlock()
888
889 // Add the log and maintain max size
890 b.consoleLogs = append(b.consoleLogs, e)
891 if len(b.consoleLogs) > b.maxConsoleLogs {
892 b.consoleLogs = b.consoleLogs[len(b.consoleLogs)-b.maxConsoleLogs:]
893 }
894}
895
896// RecentConsoleLogsTool definition
897type recentConsoleLogsInput struct {
898 Limit int `json:"limit,omitempty"`
899}
900
901// NewRecentConsoleLogsTool creates a tool for retrieving recent console logs
902func (b *BrowseTools) NewRecentConsoleLogsTool() *llm.Tool {
903 return &llm.Tool{
904 Name: "browser_recent_console_logs",
905 Description: "Get recent browser console logs",
906 InputSchema: json.RawMessage(`{
907 "type": "object",
908 "properties": {
909 "limit": {
910 "type": "integer",
911 "description": "Maximum number of log entries to return (default: 100)"
912 }
913 }
914 }`),
915 Run: b.recentConsoleLogsRun,
916 }
917}
918
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700919func (b *BrowseTools) recentConsoleLogsRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
Philip Zeyliger18e33682025-05-13 16:34:21 -0700920 var input recentConsoleLogsInput
921 if err := json.Unmarshal(m, &input); err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700922 return llm.ErrorfToolOut("invalid input: %w", err)
Philip Zeyliger18e33682025-05-13 16:34:21 -0700923 }
924
925 // Ensure browser is initialized
926 _, err := b.GetBrowserContext()
927 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700928 return llm.ErrorToolOut(err)
Philip Zeyliger18e33682025-05-13 16:34:21 -0700929 }
930
931 // Apply limit (default to 100 if not specified)
932 limit := 100
933 if input.Limit > 0 {
934 limit = input.Limit
935 }
936
937 // Get console logs with mutex protection
938 b.consoleLogsMutex.Lock()
939 logs := make([]*runtime.EventConsoleAPICalled, 0, len(b.consoleLogs))
940 start := 0
941 if len(b.consoleLogs) > limit {
942 start = len(b.consoleLogs) - limit
943 }
944 logs = append(logs, b.consoleLogs[start:]...)
945 b.consoleLogsMutex.Unlock()
946
947 // Format the logs as JSON
948 logData, err := json.MarshalIndent(logs, "", " ")
949 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700950 return llm.ErrorfToolOut("failed to serialize logs: %w", err)
Philip Zeyliger18e33682025-05-13 16:34:21 -0700951 }
952
953 // Format the logs
954 var sb strings.Builder
955 sb.WriteString(fmt.Sprintf("Retrieved %d console log entries:\n\n", len(logs)))
956
957 if len(logs) == 0 {
958 sb.WriteString("No console logs captured.")
959 } else {
960 // Add the JSON data for full details
961 sb.WriteString(string(logData))
962 }
963
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700964 return llm.ToolOut{LLMContent: llm.TextContent(sb.String())}
Philip Zeyliger18e33682025-05-13 16:34:21 -0700965}
966
967// ClearConsoleLogsTool definition
968type clearConsoleLogsInput struct{}
969
970// NewClearConsoleLogsTool creates a tool for clearing console logs
971func (b *BrowseTools) NewClearConsoleLogsTool() *llm.Tool {
972 return &llm.Tool{
973 Name: "browser_clear_console_logs",
974 Description: "Clear all captured browser console logs",
Josh Bleecher Snyder74d690e2025-05-14 18:16:03 -0700975 InputSchema: llm.EmptySchema(),
976 Run: b.clearConsoleLogsRun,
Philip Zeyliger18e33682025-05-13 16:34:21 -0700977 }
978}
979
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700980func (b *BrowseTools) clearConsoleLogsRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
Philip Zeyliger18e33682025-05-13 16:34:21 -0700981 var input clearConsoleLogsInput
982 if err := json.Unmarshal(m, &input); err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700983 return llm.ErrorfToolOut("invalid input: %w", err)
Philip Zeyliger18e33682025-05-13 16:34:21 -0700984 }
985
986 // Ensure browser is initialized
987 _, err := b.GetBrowserContext()
988 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700989 return llm.ErrorToolOut(err)
Philip Zeyliger18e33682025-05-13 16:34:21 -0700990 }
991
992 // Clear console logs with mutex protection
993 b.consoleLogsMutex.Lock()
994 logCount := len(b.consoleLogs)
995 b.consoleLogs = make([]*runtime.EventConsoleAPICalled, 0)
996 b.consoleLogsMutex.Unlock()
997
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700998 return llm.ToolOut{LLMContent: llm.TextContent(fmt.Sprintf("Cleared %d console log entries.", logCount))}
Philip Zeyliger18e33682025-05-13 16:34:21 -0700999}