claudetool/browse/browse.go - sketch - Gitiles

 // Package browse provides browser automation tools for the agent
 package browse

 import (
 	"context"
 	"encoding/base64"
 	"encoding/json"
 	"fmt"
 	"log"
 	"net/http"
 	"net/url"
 	"os"
 	"path/filepath"
 	"strings"
 	"sync"
 	"time"

 	"github.com/chromedp/cdproto/runtime"
 	"github.com/chromedp/chromedp"
 	"github.com/google/uuid"
 	"sketch.dev/llm"
 )

 // ScreenshotDir is the directory where screenshots are stored
 const ScreenshotDir = "/tmp/sketch-screenshots"

 // BrowseTools contains all browser tools and manages a shared browser instance
 type BrowseTools struct {
 	ctx              context.Context
 	cancel           context.CancelFunc
 	browserCtx       context.Context
 	browserCtxCancel context.CancelFunc
 	mux              sync.Mutex
 	initOnce         sync.Once
 	initialized      bool
 	initErr          error
 	// Map to track screenshots by ID and their creation time
 	screenshots      map[string]time.Time
 	screenshotsMutex sync.Mutex
 	// Console logs storage
 	consoleLogs      []*runtime.EventConsoleAPICalled
 	consoleLogsMutex sync.Mutex
 	maxConsoleLogs   int
 }

 // NewBrowseTools creates a new set of browser automation tools
 func NewBrowseTools(ctx context.Context) *BrowseTools {
 	ctx, cancel := context.WithCancel(ctx)

 	// Ensure the screenshot directory exists
 	if err := os.MkdirAll(ScreenshotDir, 0o755); err != nil {
 		log.Printf("Failed to create screenshot directory: %v", err)
 	}

 	b := &BrowseTools{
 		ctx:            ctx,
 		cancel:         cancel,
 		screenshots:    make(map[string]time.Time),
 		consoleLogs:    make([]*runtime.EventConsoleAPICalled, 0),
 		maxConsoleLogs: 100,
 	}

 	return b
 }

 // Initialize starts the browser if it's not already running
 func (b *BrowseTools) Initialize() error {
 	b.mux.Lock()
 	defer b.mux.Unlock()

 	b.initOnce.Do(func() {
 		// ChromeDP.ExecPath has a list of common places to find Chrome...
 		opts := chromedp.DefaultExecAllocatorOptions[:]
 		// This is the default when running as root, but we generally need it
 		// when running in a container, even when we aren't root (which is largely
 		// the case for tests).
 		opts = append(opts, chromedp.NoSandbox)
 		// Setting 'DBUS_SESSION_BUS_ADDRESS=""' or this flag allows tests to pass
 		// in GitHub runner contexts. It's a mystery why the failure isn't clear when this fails.
 		opts = append(opts, chromedp.Flag("--disable-dbus", true))
 		// This can be pretty slow in tests
 		opts = append(opts, chromedp.WSURLReadTimeout(60*time.Second))
 		// Add environment variable to mark this as a sketch internal process
 		opts = append(opts, chromedp.Env("SKETCH_IGNORE_PORTS=1"))
 		allocCtx, _ := chromedp.NewExecAllocator(b.ctx, opts...)
 		browserCtx, browserCancel := chromedp.NewContext(
 			allocCtx,
 			chromedp.WithLogf(log.Printf), chromedp.WithErrorf(log.Printf), chromedp.WithBrowserOption(chromedp.WithDialTimeout(60*time.Second)),
 		)

 		b.browserCtx = browserCtx
 		b.browserCtxCancel = browserCancel

 		// Set up console log listener
 		chromedp.ListenTarget(browserCtx, func(ev any) {
 			switch e := ev.(type) {
 			case *runtime.EventConsoleAPICalled:
 				b.captureConsoleLog(e)
 			}
 		})

 		// Ensure the browser starts
 		if err := chromedp.Run(browserCtx); err != nil {
 			b.initErr = fmt.Errorf("failed to start browser (please apt get chromium or equivalent): %w", err)
 			return
 		}

 		// Set default viewport size to 1280x720 (16:9 widescreen)
 		if err := chromedp.Run(browserCtx, chromedp.EmulateViewport(1280, 720)); err != nil {
 			b.initErr = fmt.Errorf("failed to set default viewport: %w", err)
 			return
 		}

 		b.initialized = true
 	})

 	return b.initErr
 }

 // Close shuts down the browser
 func (b *BrowseTools) Close() {
 	b.mux.Lock()
 	defer b.mux.Unlock()

 	if b.browserCtxCancel != nil {
 		b.browserCtxCancel()
 		b.browserCtxCancel = nil
 	}

 	if b.cancel != nil {
 		b.cancel()
 	}

 	b.initialized = false
 	log.Println("Browser closed")
 }

 // GetBrowserContext returns the context for browser operations
 func (b *BrowseTools) GetBrowserContext() (context.Context, error) {
 	if err := b.Initialize(); err != nil {
 		return nil, err
 	}
 	return b.browserCtx, nil
 }

 // NavigateTool definition
 type navigateInput struct {
 	URL     string `json:"url"`
 	Timeout string `json:"timeout,omitempty"`
 }

 // isPort80 reports whether urlStr definitely uses port 80.
 func isPort80(urlStr string) bool {
 	parsedURL, err := url.Parse(urlStr)
 	if err != nil {
 		return false
 	}
 	port := parsedURL.Port()
 	return port == "80" || (port == "" && parsedURL.Scheme == "http")
 }

 // NewNavigateTool creates a tool for navigating to URLs
 func (b *BrowseTools) NewNavigateTool() *llm.Tool {
 	return &llm.Tool{
 		Name:        "browser_navigate",
 		Description: "Navigate the browser to a specific URL and wait for page to load",
 		InputSchema: json.RawMessage(`{
 			"type": "object",
 			"properties": {
 				"url": {
 					"type": "string",
 					"description": "The URL to navigate to"
 				},
 				"timeout": {
 					"type": "string",
 					"description": "Timeout as a Go duration string (default: 15s)"
 				}
 			},
 			"required": ["url"]
 		}`),
 		Run: b.navigateRun,
 	}
 }

 func (b *BrowseTools) navigateRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
 	var input navigateInput
 	if err := json.Unmarshal(m, &input); err != nil {
 		return llm.ErrorfToolOut("invalid input: %w", err)
 	}

 	if isPort80(input.URL) {
 		return llm.ErrorToolOut(fmt.Errorf("port 80 is not the port you're looking for--port 80 is the main sketch server"))
 	}

 	browserCtx, err := b.GetBrowserContext()
 	if err != nil {
 		return llm.ErrorToolOut(err)
 	}

 	// Create a timeout context for this operation
 	timeoutCtx, cancel := context.WithTimeout(browserCtx, parseTimeout(input.Timeout))
 	defer cancel()

 	err = chromedp.Run(timeoutCtx,
 		chromedp.Navigate(input.URL),
 		chromedp.WaitReady("body"),
 	)
 	if err != nil {
 		return llm.ErrorToolOut(err)
 	}

 	return llm.ToolOut{LLMContent: llm.TextContent("done")}
 }

 // EvalTool definition
 type evalInput struct {
 	Expression string `json:"expression"`
 	Timeout    string `json:"timeout,omitempty"`
 	Await      *bool  `json:"await,omitempty"`
 }

 // NewEvalTool creates a tool for evaluating JavaScript
 func (b *BrowseTools) NewEvalTool() *llm.Tool {
 	return &llm.Tool{
 		Name: "browser_eval",
 		Description: `Evaluate JavaScript in the browser context.
 Your go-to tool for interacting with content: clicking buttons, typing, getting content, scrolling, resizing, waiting for content/selector to be ready, etc.`,
 		InputSchema: json.RawMessage(`{
 			"type": "object",
 			"properties": {
 				"expression": {
 					"type": "string",
 					"description": "JavaScript expression to evaluate"
 				},
 				"timeout": {
 					"type": "string",
 					"description": "Timeout as a Go duration string (default: 15s)"
 				},
 				"await": {
 					"type": "boolean",
 					"description": "If true, wait for promises to resolve and return their resolved value (default: true)"
 				}
 			},
 			"required": ["expression"]
 		}`),
 		Run: b.evalRun,
 	}
 }

 func (b *BrowseTools) evalRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
 	var input evalInput
 	if err := json.Unmarshal(m, &input); err != nil {
 		return llm.ErrorfToolOut("invalid input: %w", err)
 	}

 	browserCtx, err := b.GetBrowserContext()
 	if err != nil {
 		return llm.ErrorToolOut(err)
 	}

 	// Create a timeout context for this operation
 	timeoutCtx, cancel := context.WithTimeout(browserCtx, parseTimeout(input.Timeout))
 	defer cancel()

 	var result any
 	var evalOps []chromedp.EvaluateOption

 	await := true
 	if input.Await != nil {
 		await = *input.Await
 	}
 	if await {
 		evalOps = append(evalOps, func(p *runtime.EvaluateParams) *runtime.EvaluateParams {
 			return p.WithAwaitPromise(true)
 		})
 	}

 	evalAction := chromedp.Evaluate(input.Expression, &result, evalOps...)

 	err = chromedp.Run(timeoutCtx, evalAction)
 	if err != nil {
 		return llm.ErrorToolOut(err)
 	}

 	// Return the result as JSON
 	response, err := json.Marshal(result)
 	if err != nil {
 		return llm.ErrorfToolOut("failed to marshal response: %w", err)
 	}

 	return llm.ToolOut{LLMContent: llm.TextContent("<javascript_result>" + string(response) + "</javascript_result>")}
 }

 // ScreenshotTool definition
 type screenshotInput struct {
 	Selector string `json:"selector,omitempty"`
 	Timeout  string `json:"timeout,omitempty"`
 }

 // NewScreenshotTool creates a tool for taking screenshots
 func (b *BrowseTools) NewScreenshotTool() *llm.Tool {
 	return &llm.Tool{
 		Name:        "browser_take_screenshot",
 		Description: "Take a screenshot of the page or a specific element",
 		InputSchema: json.RawMessage(`{
 			"type": "object",
 			"properties": {
 				"selector": {
 					"type": "string",
 					"description": "CSS selector for the element to screenshot (optional)"
 				},
 				"timeout": {
 					"type": "string",
 					"description": "Timeout as a Go duration string (default: 15s)"
 				}
 			}
 		}`),
 		Run: b.screenshotRun,
 	}
 }

 func (b *BrowseTools) screenshotRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
 	var input screenshotInput
 	if err := json.Unmarshal(m, &input); err != nil {
 		return llm.ErrorfToolOut("invalid input: %w", err)
 	}

 	browserCtx, err := b.GetBrowserContext()
 	if err != nil {
 		return llm.ErrorToolOut(err)
 	}

 	// Create a timeout context for this operation
 	timeoutCtx, cancel := context.WithTimeout(browserCtx, parseTimeout(input.Timeout))
 	defer cancel()

 	var buf []byte
 	var actions []chromedp.Action

 	if input.Selector != "" {
 		// Take screenshot of specific element
 		actions = append(actions,
 			chromedp.WaitReady(input.Selector),
 			chromedp.Screenshot(input.Selector, &buf, chromedp.NodeVisible),
 		)
 	} else {
 		// Take full page screenshot
 		actions = append(actions, chromedp.CaptureScreenshot(&buf))
 	}

 	err = chromedp.Run(timeoutCtx, actions...)
 	if err != nil {
 		return llm.ErrorToolOut(err)
 	}

 	// Save the screenshot and get its ID for potential future reference
 	id := b.SaveScreenshot(buf)
 	if id == "" {
 		return llm.ErrorToolOut(fmt.Errorf("failed to save screenshot"))
 	}

 	// Get the full path to the screenshot
 	screenshotPath := GetScreenshotPath(id)

 	// Encode the image as base64
 	base64Data := base64.StdEncoding.EncodeToString(buf)

 	// Return the screenshot directly to the LLM
 	return llm.ToolOut{LLMContent: []llm.Content{
 		{
 			Type: llm.ContentTypeText,
 			Text: fmt.Sprintf("Screenshot taken (saved as %s)", screenshotPath),
 		},
 		{
 			Type:      llm.ContentTypeText, // Will be mapped to image in content array
 			MediaType: "image/png",
 			Data:      base64Data,
 		},
 	}}
 }

 // GetTools returns browser tools, optionally filtering out screenshot-related tools
 func (b *BrowseTools) GetTools(includeScreenshotTools bool) []*llm.Tool {
 	tools := []*llm.Tool{
 		b.NewNavigateTool(),
 		b.NewEvalTool(),
 		b.NewRecentConsoleLogsTool(),
 		b.NewClearConsoleLogsTool(),
 	}

 	// Add screenshot-related tools if supported
 	if includeScreenshotTools {
 		tools = append(tools, b.NewScreenshotTool())
 		tools = append(tools, b.NewReadImageTool())
 	}

 	return tools
 }

 // SaveScreenshot saves a screenshot to disk and returns its ID
 func (b *BrowseTools) SaveScreenshot(data []byte) string {
 	// Generate a unique ID
 	id := uuid.New().String()

 	// Save the file
 	filePath := filepath.Join(ScreenshotDir, id+".png")
 	if err := os.WriteFile(filePath, data, 0o644); err != nil {
 		log.Printf("Failed to save screenshot: %v", err)
 		return ""
 	}

 	// Track this screenshot
 	b.screenshotsMutex.Lock()
 	b.screenshots[id] = time.Now()
 	b.screenshotsMutex.Unlock()

 	return id
 }

 // GetScreenshotPath returns the full path to a screenshot by ID
 func GetScreenshotPath(id string) string {
 	return filepath.Join(ScreenshotDir, id+".png")
 }

 // ReadImageTool definition
 type readImageInput struct {
 	Path    string `json:"path"`
 	Timeout string `json:"timeout,omitempty"`
 }

 // NewReadImageTool creates a tool for reading images and returning them as base64 encoded data
 func (b *BrowseTools) NewReadImageTool() *llm.Tool {
 	return &llm.Tool{
 		Name:        "read_image",
 		Description: "Read an image file (such as a screenshot) and encode it for sending to the LLM",
 		InputSchema: json.RawMessage(`{
 			"type": "object",
 			"properties": {
 				"path": {
 					"type": "string",
 					"description": "Path to the image file to read"
 				},
 				"timeout": {
 					"type": "string",
 					"description": "Timeout as a Go duration string (default: 15s)"
 				}
 			},
 			"required": ["path"]
 		}`),
 		Run: b.readImageRun,
 	}
 }

 func (b *BrowseTools) readImageRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
 	var input readImageInput
 	if err := json.Unmarshal(m, &input); err != nil {
 		return llm.ErrorfToolOut("invalid input: %w", err)
 	}

 	// Check if the path exists
 	if _, err := os.Stat(input.Path); os.IsNotExist(err) {
 		return llm.ErrorfToolOut("image file not found: %s", input.Path)
 	}

 	// Read the file
 	imageData, err := os.ReadFile(input.Path)
 	if err != nil {
 		return llm.ErrorfToolOut("failed to read image file: %w", err)
 	}

 	// Detect the image type
 	imageType := http.DetectContentType(imageData)
 	if !strings.HasPrefix(imageType, "image/") {
 		return llm.ErrorfToolOut("file is not an image: %s", imageType)
 	}

 	// Encode the image as base64
 	base64Data := base64.StdEncoding.EncodeToString(imageData)

 	// Create a Content object that includes both text and the image
 	return llm.ToolOut{LLMContent: []llm.Content{
 		{
 			Type: llm.ContentTypeText,
 			Text: fmt.Sprintf("Image from %s (type: %s)", input.Path, imageType),
 		},
 		{
 			Type:      llm.ContentTypeText, // Will be mapped to image in content array
 			MediaType: imageType,
 			Data:      base64Data,
 		},
 	}}
 }

 // parseTimeout parses a timeout string and returns a time.Duration
 // It returns a default of 5 seconds if the timeout is empty or invalid
 func parseTimeout(timeout string) time.Duration {
 	dur, err := time.ParseDuration(timeout)
 	if err != nil {
 		return 15 * time.Second
 	}
 	return dur
 }

 // captureConsoleLog captures a console log event and stores it
 func (b *BrowseTools) captureConsoleLog(e *runtime.EventConsoleAPICalled) {
 	// Add to logs with mutex protection
 	b.consoleLogsMutex.Lock()
 	defer b.consoleLogsMutex.Unlock()

 	// Add the log and maintain max size
 	b.consoleLogs = append(b.consoleLogs, e)
 	if len(b.consoleLogs) > b.maxConsoleLogs {
 		b.consoleLogs = b.consoleLogs[len(b.consoleLogs)-b.maxConsoleLogs:]
 	}
 }

 // RecentConsoleLogsTool definition
 type recentConsoleLogsInput struct {
 	Limit int `json:"limit,omitempty"`
 }

 // NewRecentConsoleLogsTool creates a tool for retrieving recent console logs
 func (b *BrowseTools) NewRecentConsoleLogsTool() *llm.Tool {
 	return &llm.Tool{
 		Name:        "browser_recent_console_logs",
 		Description: "Get recent browser console logs",
 		InputSchema: json.RawMessage(`{
 			"type": "object",
 			"properties": {
 				"limit": {
 					"type": "integer",
 					"description": "Maximum number of log entries to return (default: 100)"
 				}
 			}
 		}`),
 		Run: b.recentConsoleLogsRun,
 	}
 }

 func (b *BrowseTools) recentConsoleLogsRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
 	var input recentConsoleLogsInput
 	if err := json.Unmarshal(m, &input); err != nil {
 		return llm.ErrorfToolOut("invalid input: %w", err)
 	}

 	// Ensure browser is initialized
 	_, err := b.GetBrowserContext()
 	if err != nil {
 		return llm.ErrorToolOut(err)
 	}

 	// Apply limit (default to 100 if not specified)
 	limit := 100
 	if input.Limit > 0 {
 		limit = input.Limit
 	}

 	// Get console logs with mutex protection
 	b.consoleLogsMutex.Lock()
 	logs := make([]*runtime.EventConsoleAPICalled, 0, len(b.consoleLogs))
 	start := 0
 	if len(b.consoleLogs) > limit {
 		start = len(b.consoleLogs) - limit
 	}
 	logs = append(logs, b.consoleLogs[start:]...)
 	b.consoleLogsMutex.Unlock()

 	// Format the logs as JSON
 	logData, err := json.MarshalIndent(logs, "", "  ")
 	if err != nil {
 		return llm.ErrorfToolOut("failed to serialize logs: %w", err)
 	}

 	// Format the logs
 	var sb strings.Builder
 	sb.WriteString(fmt.Sprintf("Retrieved %d console log entries:\n\n", len(logs)))

 	if len(logs) == 0 {
 		sb.WriteString("No console logs captured.")
 	} else {
 		// Add the JSON data for full details
 		sb.WriteString(string(logData))
 	}

 	return llm.ToolOut{LLMContent: llm.TextContent(sb.String())}
 }

 // ClearConsoleLogsTool definition
 type clearConsoleLogsInput struct{}

 // NewClearConsoleLogsTool creates a tool for clearing console logs
 func (b *BrowseTools) NewClearConsoleLogsTool() *llm.Tool {
 	return &llm.Tool{
 		Name:        "browser_clear_console_logs",
 		Description: "Clear all captured browser console logs",
 		InputSchema: llm.EmptySchema(),
 		Run:         b.clearConsoleLogsRun,
 	}
 }

 func (b *BrowseTools) clearConsoleLogsRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
 	var input clearConsoleLogsInput
 	if err := json.Unmarshal(m, &input); err != nil {
 		return llm.ErrorfToolOut("invalid input: %w", err)
 	}

 	// Ensure browser is initialized
 	_, err := b.GetBrowserContext()
 	if err != nil {
 		return llm.ErrorToolOut(err)
 	}

 	// Clear console logs with mutex protection
 	b.consoleLogsMutex.Lock()
 	logCount := len(b.consoleLogs)
 	b.consoleLogs = make([]*runtime.EventConsoleAPICalled, 0)
 	b.consoleLogsMutex.Unlock()

 	return llm.ToolOut{LLMContent: llm.TextContent(fmt.Sprintf("Cleared %d console log entries.", logCount))}
 }
	// Package browse provides browser automation tools for the agent
	package browse

	import (
	"context"
	"encoding/base64"
	"encoding/json"
	"fmt"
	"log"
	"net/http"
	"net/url"
	"os"
	"path/filepath"
	"strings"
	"sync"
	"time"

	"github.com/chromedp/cdproto/runtime"
	"github.com/chromedp/chromedp"
	"github.com/google/uuid"
	"sketch.dev/llm"
	)

	// ScreenshotDir is the directory where screenshots are stored
	const ScreenshotDir = "/tmp/sketch-screenshots"

	// BrowseTools contains all browser tools and manages a shared browser instance
	type BrowseTools struct {
	ctx context.Context
	cancel context.CancelFunc
	browserCtx context.Context
	browserCtxCancel context.CancelFunc
	mux sync.Mutex
	initOnce sync.Once
	initialized bool
	initErr error
	// Map to track screenshots by ID and their creation time
	screenshots map[string]time.Time
	screenshotsMutex sync.Mutex
	// Console logs storage
	consoleLogs []*runtime.EventConsoleAPICalled
	consoleLogsMutex sync.Mutex
	maxConsoleLogs int
	}

	// NewBrowseTools creates a new set of browser automation tools
	func NewBrowseTools(ctx context.Context) *BrowseTools {
	ctx, cancel := context.WithCancel(ctx)

	// Ensure the screenshot directory exists
	if err := os.MkdirAll(ScreenshotDir, 0o755); err != nil {
	log.Printf("Failed to create screenshot directory: %v", err)
	}

	b := &BrowseTools{
	ctx: ctx,
	cancel: cancel,
	screenshots: make(map[string]time.Time),
	consoleLogs: make([]*runtime.EventConsoleAPICalled, 0),
	maxConsoleLogs: 100,
	}

	return b
	}

	// Initialize starts the browser if it's not already running
	func (b *BrowseTools) Initialize() error {
	b.mux.Lock()
	defer b.mux.Unlock()

	b.initOnce.Do(func() {
	// ChromeDP.ExecPath has a list of common places to find Chrome...
	opts := chromedp.DefaultExecAllocatorOptions[:]
	// This is the default when running as root, but we generally need it
	// when running in a container, even when we aren't root (which is largely
	// the case for tests).
	opts = append(opts, chromedp.NoSandbox)
	// Setting 'DBUS_SESSION_BUS_ADDRESS=""' or this flag allows tests to pass
	// in GitHub runner contexts. It's a mystery why the failure isn't clear when this fails.
	opts = append(opts, chromedp.Flag("--disable-dbus", true))
	// This can be pretty slow in tests
	opts = append(opts, chromedp.WSURLReadTimeout(60*time.Second))
	// Add environment variable to mark this as a sketch internal process
	opts = append(opts, chromedp.Env("SKETCH_IGNORE_PORTS=1"))
	allocCtx, _ := chromedp.NewExecAllocator(b.ctx, opts...)
	browserCtx, browserCancel := chromedp.NewContext(
	allocCtx,
	chromedp.WithLogf(log.Printf), chromedp.WithErrorf(log.Printf), chromedp.WithBrowserOption(chromedp.WithDialTimeout(60*time.Second)),
	)

	b.browserCtx = browserCtx
	b.browserCtxCancel = browserCancel

	// Set up console log listener
	chromedp.ListenTarget(browserCtx, func(ev any) {
	switch e := ev.(type) {
	case *runtime.EventConsoleAPICalled:
	b.captureConsoleLog(e)
	}
	})

	// Ensure the browser starts
	if err := chromedp.Run(browserCtx); err != nil {
	b.initErr = fmt.Errorf("failed to start browser (please apt get chromium or equivalent): %w", err)
	return
	}

	// Set default viewport size to 1280x720 (16:9 widescreen)
	if err := chromedp.Run(browserCtx, chromedp.EmulateViewport(1280, 720)); err != nil {
	b.initErr = fmt.Errorf("failed to set default viewport: %w", err)
	return
	}

	b.initialized = true
	})

	return b.initErr
	}

	// Close shuts down the browser
	func (b *BrowseTools) Close() {
	b.mux.Lock()
	defer b.mux.Unlock()

	if b.browserCtxCancel != nil {
	b.browserCtxCancel()
	b.browserCtxCancel = nil
	}

	if b.cancel != nil {
	b.cancel()
	}

	b.initialized = false
	log.Println("Browser closed")
	}

	// GetBrowserContext returns the context for browser operations
	func (b *BrowseTools) GetBrowserContext() (context.Context, error) {
	if err := b.Initialize(); err != nil {
	return nil, err
	}
	return b.browserCtx, nil
	}

	// NavigateTool definition
	type navigateInput struct {
	URL string `json:"url"`
	Timeout string `json:"timeout,omitempty"`
	}

	// isPort80 reports whether urlStr definitely uses port 80.
	func isPort80(urlStr string) bool {
	parsedURL, err := url.Parse(urlStr)
	if err != nil {
	return false
	}
	port := parsedURL.Port()
	return port == "80" \|\| (port == "" && parsedURL.Scheme == "http")
	}

	// NewNavigateTool creates a tool for navigating to URLs
	func (b BrowseTools) NewNavigateTool() llm.Tool {
	return &llm.Tool{
	Name: "browser_navigate",
	Description: "Navigate the browser to a specific URL and wait for page to load",
	InputSchema: json.RawMessage(`{
	"type": "object",
	"properties": {
	"url": {
	"type": "string",
	"description": "The URL to navigate to"
	},
	"timeout": {
	"type": "string",
	"description": "Timeout as a Go duration string (default: 15s)"
	}
	},
	"required": ["url"]
	}`),
	Run: b.navigateRun,
	}
	}

	func (b *BrowseTools) navigateRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
	var input navigateInput
	if err := json.Unmarshal(m, &input); err != nil {
	return llm.ErrorfToolOut("invalid input: %w", err)
	}

	if isPort80(input.URL) {
	return llm.ErrorToolOut(fmt.Errorf("port 80 is not the port you're looking for--port 80 is the main sketch server"))
	}

	browserCtx, err := b.GetBrowserContext()
	if err != nil {
	return llm.ErrorToolOut(err)
	}

	// Create a timeout context for this operation
	timeoutCtx, cancel := context.WithTimeout(browserCtx, parseTimeout(input.Timeout))
	defer cancel()

	err = chromedp.Run(timeoutCtx,
	chromedp.Navigate(input.URL),
	chromedp.WaitReady("body"),
	)
	if err != nil {
	return llm.ErrorToolOut(err)
	}

	return llm.ToolOut{LLMContent: llm.TextContent("done")}
	}

	// EvalTool definition
	type evalInput struct {
	Expression string `json:"expression"`
	Timeout string `json:"timeout,omitempty"`
	Await *bool `json:"await,omitempty"`
	}

	// NewEvalTool creates a tool for evaluating JavaScript
	func (b BrowseTools) NewEvalTool() llm.Tool {
	return &llm.Tool{
	Name: "browser_eval",
	Description: `Evaluate JavaScript in the browser context.
	Your go-to tool for interacting with content: clicking buttons, typing, getting content, scrolling, resizing, waiting for content/selector to be ready, etc.`,
	InputSchema: json.RawMessage(`{
	"type": "object",
	"properties": {
	"expression": {
	"type": "string",
	"description": "JavaScript expression to evaluate"
	},
	"timeout": {
	"type": "string",
	"description": "Timeout as a Go duration string (default: 15s)"
	},
	"await": {
	"type": "boolean",
	"description": "If true, wait for promises to resolve and return their resolved value (default: true)"
	}
	},
	"required": ["expression"]
	}`),
	Run: b.evalRun,
	}
	}

	func (b *BrowseTools) evalRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
	var input evalInput
	if err := json.Unmarshal(m, &input); err != nil {
	return llm.ErrorfToolOut("invalid input: %w", err)
	}

	browserCtx, err := b.GetBrowserContext()
	if err != nil {
	return llm.ErrorToolOut(err)
	}

	// Create a timeout context for this operation
	timeoutCtx, cancel := context.WithTimeout(browserCtx, parseTimeout(input.Timeout))
	defer cancel()

	var result any
	var evalOps []chromedp.EvaluateOption

	await := true
	if input.Await != nil {
	await = *input.Await
	}
	if await {
	evalOps = append(evalOps, func(p runtime.EvaluateParams) runtime.EvaluateParams {
	return p.WithAwaitPromise(true)
	})
	}

	evalAction := chromedp.Evaluate(input.Expression, &result, evalOps...)

	err = chromedp.Run(timeoutCtx, evalAction)
	if err != nil {
	return llm.ErrorToolOut(err)
	}

	// Return the result as JSON
	response, err := json.Marshal(result)
	if err != nil {
	return llm.ErrorfToolOut("failed to marshal response: %w", err)
	}

	return llm.ToolOut{LLMContent: llm.TextContent("<javascript_result>" + string(response) + "</javascript_result>")}
	}

	// ScreenshotTool definition
	type screenshotInput struct {
	Selector string `json:"selector,omitempty"`
	Timeout string `json:"timeout,omitempty"`
	}

	// NewScreenshotTool creates a tool for taking screenshots
	func (b BrowseTools) NewScreenshotTool() llm.Tool {
	return &llm.Tool{
	Name: "browser_take_screenshot",
	Description: "Take a screenshot of the page or a specific element",
	InputSchema: json.RawMessage(`{
	"type": "object",
	"properties": {
	"selector": {
	"type": "string",
	"description": "CSS selector for the element to screenshot (optional)"
	},
	"timeout": {
	"type": "string",
	"description": "Timeout as a Go duration string (default: 15s)"
	}
	}
	}`),
	Run: b.screenshotRun,
	}
	}

	func (b *BrowseTools) screenshotRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
	var input screenshotInput
	if err := json.Unmarshal(m, &input); err != nil {
	return llm.ErrorfToolOut("invalid input: %w", err)
	}

	browserCtx, err := b.GetBrowserContext()
	if err != nil {
	return llm.ErrorToolOut(err)
	}

	// Create a timeout context for this operation
	timeoutCtx, cancel := context.WithTimeout(browserCtx, parseTimeout(input.Timeout))
	defer cancel()

	var buf []byte
	var actions []chromedp.Action

	if input.Selector != "" {
	// Take screenshot of specific element
	actions = append(actions,
	chromedp.WaitReady(input.Selector),
	chromedp.Screenshot(input.Selector, &buf, chromedp.NodeVisible),
	)
	} else {
	// Take full page screenshot
	actions = append(actions, chromedp.CaptureScreenshot(&buf))
	}

	err = chromedp.Run(timeoutCtx, actions...)
	if err != nil {
	return llm.ErrorToolOut(err)
	}

	// Save the screenshot and get its ID for potential future reference
	id := b.SaveScreenshot(buf)
	if id == "" {
	return llm.ErrorToolOut(fmt.Errorf("failed to save screenshot"))
	}

	// Get the full path to the screenshot
	screenshotPath := GetScreenshotPath(id)

	// Encode the image as base64
	base64Data := base64.StdEncoding.EncodeToString(buf)

	// Return the screenshot directly to the LLM
	return llm.ToolOut{LLMContent: []llm.Content{
	{
	Type: llm.ContentTypeText,
	Text: fmt.Sprintf("Screenshot taken (saved as %s)", screenshotPath),
	},
	{
	Type: llm.ContentTypeText, // Will be mapped to image in content array
	MediaType: "image/png",
	Data: base64Data,
	},
	}}
	}

	// GetTools returns browser tools, optionally filtering out screenshot-related tools
	func (b BrowseTools) GetTools(includeScreenshotTools bool) []llm.Tool {
	tools := []*llm.Tool{
	b.NewNavigateTool(),
	b.NewEvalTool(),
	b.NewRecentConsoleLogsTool(),
	b.NewClearConsoleLogsTool(),
	}

	// Add screenshot-related tools if supported
	if includeScreenshotTools {
	tools = append(tools, b.NewScreenshotTool())
	tools = append(tools, b.NewReadImageTool())
	}

	return tools
	}

	// SaveScreenshot saves a screenshot to disk and returns its ID
	func (b *BrowseTools) SaveScreenshot(data []byte) string {
	// Generate a unique ID
	id := uuid.New().String()

	// Save the file
	filePath := filepath.Join(ScreenshotDir, id+".png")
	if err := os.WriteFile(filePath, data, 0o644); err != nil {
	log.Printf("Failed to save screenshot: %v", err)
	return ""
	}

	// Track this screenshot
	b.screenshotsMutex.Lock()
	b.screenshots[id] = time.Now()
	b.screenshotsMutex.Unlock()

	return id
	}

	// GetScreenshotPath returns the full path to a screenshot by ID
	func GetScreenshotPath(id string) string {
	return filepath.Join(ScreenshotDir, id+".png")
	}

	// ReadImageTool definition
	type readImageInput struct {
	Path string `json:"path"`
	Timeout string `json:"timeout,omitempty"`
	}

	// NewReadImageTool creates a tool for reading images and returning them as base64 encoded data
	func (b BrowseTools) NewReadImageTool() llm.Tool {
	return &llm.Tool{
	Name: "read_image",
	Description: "Read an image file (such as a screenshot) and encode it for sending to the LLM",
	InputSchema: json.RawMessage(`{
	"type": "object",
	"properties": {
	"path": {
	"type": "string",
	"description": "Path to the image file to read"
	},
	"timeout": {
	"type": "string",
	"description": "Timeout as a Go duration string (default: 15s)"
	}
	},
	"required": ["path"]
	}`),
	Run: b.readImageRun,
	}
	}

	func (b *BrowseTools) readImageRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
	var input readImageInput
	if err := json.Unmarshal(m, &input); err != nil {
	return llm.ErrorfToolOut("invalid input: %w", err)
	}

	// Check if the path exists
	if _, err := os.Stat(input.Path); os.IsNotExist(err) {
	return llm.ErrorfToolOut("image file not found: %s", input.Path)
	}

	// Read the file
	imageData, err := os.ReadFile(input.Path)
	if err != nil {
	return llm.ErrorfToolOut("failed to read image file: %w", err)
	}

	// Detect the image type
	imageType := http.DetectContentType(imageData)
	if !strings.HasPrefix(imageType, "image/") {
	return llm.ErrorfToolOut("file is not an image: %s", imageType)
	}

	// Encode the image as base64
	base64Data := base64.StdEncoding.EncodeToString(imageData)

	// Create a Content object that includes both text and the image
	return llm.ToolOut{LLMContent: []llm.Content{
	{
	Type: llm.ContentTypeText,
	Text: fmt.Sprintf("Image from %s (type: %s)", input.Path, imageType),
	},
	{
	Type: llm.ContentTypeText, // Will be mapped to image in content array
	MediaType: imageType,
	Data: base64Data,
	},
	}}
	}

	// parseTimeout parses a timeout string and returns a time.Duration
	// It returns a default of 5 seconds if the timeout is empty or invalid
	func parseTimeout(timeout string) time.Duration {
	dur, err := time.ParseDuration(timeout)
	if err != nil {
	return 15 * time.Second
	}
	return dur
	}

	// captureConsoleLog captures a console log event and stores it
	func (b BrowseTools) captureConsoleLog(e runtime.EventConsoleAPICalled) {
	// Add to logs with mutex protection
	b.consoleLogsMutex.Lock()
	defer b.consoleLogsMutex.Unlock()

	// Add the log and maintain max size
	b.consoleLogs = append(b.consoleLogs, e)
	if len(b.consoleLogs) > b.maxConsoleLogs {
	b.consoleLogs = b.consoleLogs[len(b.consoleLogs)-b.maxConsoleLogs:]
	}
	}

	// RecentConsoleLogsTool definition
	type recentConsoleLogsInput struct {
	Limit int `json:"limit,omitempty"`
	}

	// NewRecentConsoleLogsTool creates a tool for retrieving recent console logs
	func (b BrowseTools) NewRecentConsoleLogsTool() llm.Tool {
	return &llm.Tool{
	Name: "browser_recent_console_logs",
	Description: "Get recent browser console logs",
	InputSchema: json.RawMessage(`{
	"type": "object",
	"properties": {
	"limit": {
	"type": "integer",
	"description": "Maximum number of log entries to return (default: 100)"
	}
	}
	}`),
	Run: b.recentConsoleLogsRun,
	}
	}

	func (b *BrowseTools) recentConsoleLogsRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
	var input recentConsoleLogsInput
	if err := json.Unmarshal(m, &input); err != nil {
	return llm.ErrorfToolOut("invalid input: %w", err)
	}

	// Ensure browser is initialized
	_, err := b.GetBrowserContext()
	if err != nil {
	return llm.ErrorToolOut(err)
	}

	// Apply limit (default to 100 if not specified)
	limit := 100
	if input.Limit > 0 {
	limit = input.Limit
	}

	// Get console logs with mutex protection
	b.consoleLogsMutex.Lock()
	logs := make([]*runtime.EventConsoleAPICalled, 0, len(b.consoleLogs))
	start := 0
	if len(b.consoleLogs) > limit {
	start = len(b.consoleLogs) - limit
	}
	logs = append(logs, b.consoleLogs[start:]...)
	b.consoleLogsMutex.Unlock()

	// Format the logs as JSON
	logData, err := json.MarshalIndent(logs, "", " ")
	if err != nil {
	return llm.ErrorfToolOut("failed to serialize logs: %w", err)
	}

	// Format the logs
	var sb strings.Builder
	sb.WriteString(fmt.Sprintf("Retrieved %d console log entries:\n\n", len(logs)))

	if len(logs) == 0 {
	sb.WriteString("No console logs captured.")
	} else {
	// Add the JSON data for full details
	sb.WriteString(string(logData))
	}

	return llm.ToolOut{LLMContent: llm.TextContent(sb.String())}
	}

	// ClearConsoleLogsTool definition
	type clearConsoleLogsInput struct{}

	// NewClearConsoleLogsTool creates a tool for clearing console logs
	func (b BrowseTools) NewClearConsoleLogsTool() llm.Tool {
	return &llm.Tool{
	Name: "browser_clear_console_logs",
	Description: "Clear all captured browser console logs",
	InputSchema: llm.EmptySchema(),
	Run: b.clearConsoleLogsRun,
	}
	}

	func (b *BrowseTools) clearConsoleLogsRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
	var input clearConsoleLogsInput
	if err := json.Unmarshal(m, &input); err != nil {
	return llm.ErrorfToolOut("invalid input: %w", err)
	}

	// Ensure browser is initialized
	_, err := b.GetBrowserContext()
	if err != nil {
	return llm.ErrorToolOut(err)
	}

	// Clear console logs with mutex protection
	b.consoleLogsMutex.Lock()
	logCount := len(b.consoleLogs)
	b.consoleLogs = make([]*runtime.EventConsoleAPICalled, 0)
	b.consoleLogsMutex.Unlock()

	return llm.ToolOut{LLMContent: llm.TextContent(fmt.Sprintf("Cleared %d console log entries.", logCount))}
	}