blob: d417a06858f968fd1ed7f42289be577385b5c60a [file] [log] [blame]
Philip Zeyliger33d282f2025-05-03 04:01:54 +00001package browse
2
3import (
4 "context"
5 "encoding/json"
Philip Zeyliger72252cb2025-05-10 17:00:08 -07006 "fmt"
Philip Zeyliger33d282f2025-05-03 04:01:54 +00007 "os"
Philip Zeyliger72252cb2025-05-10 17:00:08 -07008 "path/filepath"
Philip Zeyliger33d282f2025-05-03 04:01:54 +00009 "slices"
10 "strings"
11 "testing"
12 "time"
13
14 "github.com/chromedp/chromedp"
Philip Zeyliger05224842025-05-10 18:26:08 -070015 "github.com/stretchr/testify/require"
Philip Zeyliger33d282f2025-05-03 04:01:54 +000016 "sketch.dev/llm"
17)
18
19func TestToolCreation(t *testing.T) {
20 // Create browser tools instance
21 tools := NewBrowseTools(context.Background())
22
23 // Test each tool has correct name and description
24 toolTests := []struct {
25 tool *llm.Tool
26 expectedName string
27 shortDesc string
28 requiredProps []string
29 }{
30 {tools.NewNavigateTool(), "browser_navigate", "Navigate", []string{"url"}},
31 {tools.NewClickTool(), "browser_click", "Click", []string{"selector"}},
32 {tools.NewTypeTool(), "browser_type", "Type", []string{"selector", "text"}},
33 {tools.NewWaitForTool(), "browser_wait_for", "Wait", []string{"selector"}},
34 {tools.NewGetTextTool(), "browser_get_text", "Get", []string{"selector"}},
35 {tools.NewEvalTool(), "browser_eval", "Evaluate", []string{"expression"}},
Philip Zeyliger80b488d2025-05-10 18:21:54 -070036 {tools.NewScreenshotTool(), "browser_take_screenshot", "Take", nil},
Philip Zeyliger33d282f2025-05-03 04:01:54 +000037 {tools.NewScrollIntoViewTool(), "browser_scroll_into_view", "Scroll", []string{"selector"}},
38 }
39
40 for _, tt := range toolTests {
41 t.Run(tt.expectedName, func(t *testing.T) {
42 if tt.tool.Name != tt.expectedName {
43 t.Errorf("expected name %q, got %q", tt.expectedName, tt.tool.Name)
44 }
45
46 if !strings.Contains(tt.tool.Description, tt.shortDesc) {
47 t.Errorf("description %q should contain %q", tt.tool.Description, tt.shortDesc)
48 }
49
50 // Verify schema has required properties
51 if len(tt.requiredProps) > 0 {
52 var schema struct {
53 Required []string `json:"required"`
54 }
55 if err := json.Unmarshal(tt.tool.InputSchema, &schema); err != nil {
56 t.Fatalf("failed to unmarshal schema: %v", err)
57 }
58
59 for _, prop := range tt.requiredProps {
60 if !slices.Contains(schema.Required, prop) {
61 t.Errorf("property %q should be required", prop)
62 }
63 }
64 }
65 })
66 }
67}
68
Philip Zeyliger72252cb2025-05-10 17:00:08 -070069func TestGetTools(t *testing.T) {
Philip Zeyliger33d282f2025-05-03 04:01:54 +000070 // Create browser tools instance
71 tools := NewBrowseTools(context.Background())
72
Philip Zeyliger72252cb2025-05-10 17:00:08 -070073 // Test with screenshot tools included
74 t.Run("with screenshots", func(t *testing.T) {
75 toolsWithScreenshots := tools.GetTools(true)
Philip Zeyliger18e33682025-05-13 16:34:21 -070076 if len(toolsWithScreenshots) != 12 {
77 t.Errorf("expected 12 tools with screenshots, got %d", len(toolsWithScreenshots))
Philip Zeyliger33d282f2025-05-03 04:01:54 +000078 }
Philip Zeyliger72252cb2025-05-10 17:00:08 -070079
80 // Check tool naming convention
81 for _, tool := range toolsWithScreenshots {
82 if !strings.HasPrefix(tool.Name, "browser_") {
83 t.Errorf("tool name %q does not have prefix 'browser_'", tool.Name)
84 }
85 }
86 })
87
88 // Test without screenshot tools
89 t.Run("without screenshots", func(t *testing.T) {
90 noScreenshotTools := tools.GetTools(false)
Philip Zeyliger18e33682025-05-13 16:34:21 -070091 if len(noScreenshotTools) != 10 {
92 t.Errorf("expected 10 tools without screenshots, got %d", len(noScreenshotTools))
Philip Zeyliger72252cb2025-05-10 17:00:08 -070093 }
94 })
Philip Zeyliger33d282f2025-05-03 04:01:54 +000095}
96
97// TestBrowserInitialization verifies that the browser can start correctly
98func TestBrowserInitialization(t *testing.T) {
99 // Skip long tests in short mode
100 if testing.Short() {
101 t.Skip("skipping browser initialization test in short mode")
102 }
103
104 // Create browser tools instance
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700105 ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000106 defer cancel()
107
108 tools := NewBrowseTools(ctx)
109
110 // Initialize the browser
111 err := tools.Initialize()
112 if err != nil {
113 // If browser automation is not available, skip the test
114 if strings.Contains(err.Error(), "browser automation not available") {
115 t.Skip("Browser automation not available in this environment")
116 } else {
117 t.Fatalf("Failed to initialize browser: %v", err)
118 }
119 }
120
121 // Clean up
122 defer tools.Close()
123
124 // Get browser context to verify it's working
125 browserCtx, err := tools.GetBrowserContext()
126 if err != nil {
127 t.Fatalf("Failed to get browser context: %v", err)
128 }
129
130 // Try to navigate to a simple page
131 var title string
132 err = chromedp.Run(browserCtx,
133 chromedp.Navigate("about:blank"),
134 chromedp.Title(&title),
135 )
136 if err != nil {
137 t.Fatalf("Failed to navigate to about:blank: %v", err)
138 }
139
140 t.Logf("Successfully navigated to about:blank, title: %q", title)
141}
142
143// TestNavigateTool verifies that the navigate tool works correctly
144func TestNavigateTool(t *testing.T) {
145 // Skip long tests in short mode
146 if testing.Short() {
147 t.Skip("skipping navigate tool test in short mode")
148 }
149
150 // Create browser tools instance
Philip Zeyliger80b488d2025-05-10 18:21:54 -0700151 ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000152 defer cancel()
153
154 tools := NewBrowseTools(ctx)
155 defer tools.Close()
156
157 // Check if browser initialization works
158 if err := tools.Initialize(); err != nil {
159 if strings.Contains(err.Error(), "browser automation not available") {
160 t.Skip("Browser automation not available in this environment")
161 }
162 }
163
164 // Get the navigate tool
165 navTool := tools.NewNavigateTool()
166
167 // Create input for the navigate tool
168 input := map[string]string{"url": "https://example.com"}
169 inputJSON, _ := json.Marshal(input)
170
171 // Call the tool
172 result, err := navTool.Run(ctx, json.RawMessage(inputJSON))
173 if err != nil {
174 t.Fatalf("Error running navigate tool: %v", err)
175 }
176
177 // Verify the response is successful
178 var response struct {
179 Status string `json:"status"`
180 Error string `json:"error,omitempty"`
181 }
182
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700183 resultText := result[0].Text
184 if err := json.Unmarshal([]byte(resultText), &response); err != nil {
Philip Zeyliger33d282f2025-05-03 04:01:54 +0000185 t.Fatalf("Error unmarshaling response: %v", err)
186 }
187
188 if response.Status != "success" {
189 // If browser automation is not available, skip the test
190 if strings.Contains(response.Error, "browser automation not available") {
191 t.Skip("Browser automation not available in this environment")
192 } else {
193 t.Errorf("Expected status 'success', got '%s' with error: %s", response.Status, response.Error)
194 }
195 }
196
197 // Try to get the page title to verify the navigation worked
198 browserCtx, err := tools.GetBrowserContext()
199 if err != nil {
200 // If browser automation is not available, skip the test
201 if strings.Contains(err.Error(), "browser automation not available") {
202 t.Skip("Browser automation not available in this environment")
203 } else {
204 t.Fatalf("Failed to get browser context: %v", err)
205 }
206 }
207
208 var title string
209 err = chromedp.Run(browserCtx, chromedp.Title(&title))
210 if err != nil {
211 t.Fatalf("Failed to get page title: %v", err)
212 }
213
214 t.Logf("Successfully navigated to example.com, title: %q", title)
215 if title != "Example Domain" {
216 t.Errorf("Expected title 'Example Domain', got '%s'", title)
217 }
218}
219
220// TestScreenshotTool tests that the screenshot tool properly saves files
221func TestScreenshotTool(t *testing.T) {
222 // Create browser tools instance
223 ctx := context.Background()
224 tools := NewBrowseTools(ctx)
225
226 // Test SaveScreenshot function directly
227 testData := []byte("test image data")
228 id := tools.SaveScreenshot(testData)
229 if id == "" {
230 t.Fatal("SaveScreenshot returned empty ID")
231 }
232
233 // Get the file path and check if the file exists
234 filePath := GetScreenshotPath(id)
235 _, err := os.Stat(filePath)
236 if err != nil {
237 t.Fatalf("Failed to find screenshot file: %v", err)
238 }
239
240 // Read the file contents
241 contents, err := os.ReadFile(filePath)
242 if err != nil {
243 t.Fatalf("Failed to read screenshot file: %v", err)
244 }
245
246 // Check the file contents
247 if string(contents) != string(testData) {
248 t.Errorf("File contents don't match: expected %q, got %q", string(testData), string(contents))
249 }
250
251 // Clean up the test file
252 os.Remove(filePath)
253}
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700254
255func TestReadImageTool(t *testing.T) {
256 // Create a test BrowseTools instance
257 ctx := context.Background()
258 browseTools := NewBrowseTools(ctx)
259
260 // Create a test image
261 testDir := t.TempDir()
262 testImagePath := filepath.Join(testDir, "test_image.png")
263
264 // Create a small 1x1 black PNG image
265 smallPng := []byte{
266 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00, 0x00, 0x00, 0x0D, 0x49, 0x48, 0x44, 0x52,
267 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x08, 0x02, 0x00, 0x00, 0x00, 0x90, 0x77, 0x53,
268 0xDE, 0x00, 0x00, 0x00, 0x0C, 0x49, 0x44, 0x41, 0x54, 0x08, 0xD7, 0x63, 0x60, 0x00, 0x00, 0x00,
269 0x02, 0x00, 0x01, 0xE2, 0x21, 0xBC, 0x33, 0x00, 0x00, 0x00, 0x00, 0x49, 0x45, 0x4E, 0x44, 0xAE,
270 0x42, 0x60, 0x82,
271 }
272
273 // Write the test image
274 err := os.WriteFile(testImagePath, smallPng, 0o644)
275 if err != nil {
276 t.Fatalf("Failed to create test image: %v", err)
277 }
278
279 // Create the tool
280 readImageTool := browseTools.NewReadImageTool()
281
282 // Prepare input
283 input := fmt.Sprintf(`{"path": "%s"}`, testImagePath)
284
285 // Run the tool
286 result, err := readImageTool.Run(ctx, json.RawMessage(input))
287 if err != nil {
288 t.Fatalf("Read image tool failed: %v", err)
289 }
290
291 // In the updated code, result is already a []llm.Content
292 contents := result
293
294 // Check that we got at least two content objects
295 if len(contents) < 2 {
296 t.Fatalf("Expected at least 2 content objects, got %d", len(contents))
297 }
298
299 // Check that the second content has image data
300 if contents[1].MediaType == "" {
301 t.Errorf("Expected MediaType in second content")
302 }
303
304 if contents[1].Data == "" {
305 t.Errorf("Expected Data in second content")
306 }
307}
Philip Zeyliger05224842025-05-10 18:26:08 -0700308
309// TestResizeTool tests the browser resize functionality
310func TestResizeTool(t *testing.T) {
311 ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
312 defer cancel()
313
314 // Skip if CI or headless testing environment
315 if os.Getenv("CI") != "" || os.Getenv("HEADLESS_TEST") != "" {
316 t.Skip("Skipping browser test in CI/headless environment")
317 }
318
319 t.Run("ResizeWindow", func(t *testing.T) {
320 tools := NewBrowseTools(ctx)
321 defer tools.Close()
322
323 // Resize to mobile dimensions
324 resizeTool := tools.NewResizeTool()
325 input := json.RawMessage(`{"width": 375, "height": 667}`)
326 content, err := resizeTool.Run(ctx, input)
327 require.NoError(t, err)
328 require.Contains(t, content[0].Text, "success")
329
330 // Navigate to a test page and verify using JavaScript to get window dimensions
331 navInput := json.RawMessage(`{"url": "https://example.com"}`)
332 content, err = tools.NewNavigateTool().Run(ctx, navInput)
333 require.NoError(t, err)
334 require.Contains(t, content[0].Text, "success")
335
336 // Check dimensions via JavaScript
337 evalInput := json.RawMessage(`{"expression": "({width: window.innerWidth, height: window.innerHeight})"}`)
338 content, err = tools.NewEvalTool().Run(ctx, evalInput)
339 require.NoError(t, err)
340
341 // The dimensions might not be exactly what we set (browser chrome, etc.)
342 // but they should be close
343 require.Contains(t, content[0].Text, "width")
344 require.Contains(t, content[0].Text, "height")
345 })
346}