| Marc-Antoine Ruel | f1e517d | 2025-06-08 17:30:37 +0000 | [diff] [blame] | 1 | package onstart |
| 2 | |
| 3 | import ( |
| 4 | "context" |
| Philip Zeyliger | 209ea91 | 2025-06-09 21:54:12 +0000 | [diff] [blame] | 5 | "os" |
| 6 | "os/exec" |
| 7 | "path/filepath" |
| Marc-Antoine Ruel | f1e517d | 2025-06-08 17:30:37 +0000 | [diff] [blame] | 8 | "slices" |
| 9 | "testing" |
| 10 | ) |
| 11 | |
| 12 | func TestAnalyzeCodebase(t *testing.T) { |
| 13 | t.Run("Basic Analysis", func(t *testing.T) { |
| 14 | // Test basic functionality with regular ASCII filenames |
| 15 | codebase, err := AnalyzeCodebase(context.Background(), ".") |
| 16 | if err != nil { |
| 17 | t.Fatalf("AnalyzeCodebase failed: %v", err) |
| 18 | } |
| 19 | |
| 20 | if codebase == nil { |
| 21 | t.Fatal("Expected non-nil codebase") |
| 22 | } |
| 23 | |
| 24 | if codebase.TotalFiles == 0 { |
| 25 | t.Error("Expected some files to be analyzed") |
| 26 | } |
| 27 | |
| 28 | if len(codebase.ExtensionCounts) == 0 { |
| 29 | t.Error("Expected extension counts to be populated") |
| 30 | } |
| 31 | }) |
| 32 | |
| 33 | t.Run("Non-ASCII Filenames", func(t *testing.T) { |
| Philip Zeyliger | 209ea91 | 2025-06-09 21:54:12 +0000 | [diff] [blame] | 34 | // Create a temporary directory with unicode filenames for testing |
| 35 | tempDir := t.TempDir() |
| 36 | |
| 37 | // Initialize git repository |
| 38 | cmd := exec.Command("git", "init") |
| 39 | cmd.Dir = tempDir |
| 40 | if err := cmd.Run(); err != nil { |
| 41 | t.Fatalf("Failed to init git repo: %v", err) |
| 42 | } |
| 43 | |
| 44 | cmd = exec.Command("git", "config", "user.name", "Test User") |
| 45 | cmd.Dir = tempDir |
| 46 | if err := cmd.Run(); err != nil { |
| 47 | t.Fatalf("Failed to set git user.name: %v", err) |
| 48 | } |
| 49 | |
| 50 | cmd = exec.Command("git", "config", "user.email", "test@example.com") |
| 51 | cmd.Dir = tempDir |
| 52 | if err := cmd.Run(); err != nil { |
| 53 | t.Fatalf("Failed to set git user.email: %v", err) |
| 54 | } |
| 55 | |
| 56 | // Configure git to handle unicode filenames properly |
| 57 | cmd = exec.Command("git", "config", "core.quotepath", "false") |
| 58 | cmd.Dir = tempDir |
| 59 | if err := cmd.Run(); err != nil { |
| 60 | t.Fatalf("Failed to set git core.quotepath: %v", err) |
| 61 | } |
| 62 | |
| 63 | cmd = exec.Command("git", "config", "core.precomposeunicode", "true") |
| 64 | cmd.Dir = tempDir |
| 65 | if err := cmd.Run(); err != nil { |
| 66 | t.Fatalf("Failed to set git core.precomposeunicode: %v", err) |
| 67 | } |
| 68 | |
| 69 | // Create test files with unicode characters dynamically |
| 70 | testFiles := map[string]string{ |
| 71 | "测试文件.go": "// Package test with Chinese characters in filename\npackage test\n\nfunc TestFunction() {\n\t// This is a test file\n}", |
| 72 | "café.js": "// JavaScript file with French characters\nconsole.log('Hello from café!');", |
| 73 | "русский.py": "# Python file with Russian characters\nprint('Привет мир!')", |
| 74 | "🚀rocket.md": "# README with Emoji\n\nThis file has an emoji in the filename.", |
| 75 | "readme-español.md": "# Spanish README\n\nEste es un archivo de documentación.", |
| 76 | "Übung.html": "<!DOCTYPE html>\n<html><head><title>German Exercise</title></head><body><h1>Übung</h1></body></html>", |
| 77 | "Makefile-日本語": "# Japanese Makefile\nall:\n\techo 'Japanese makefile'", |
| 78 | } |
| 79 | |
| 80 | // Create subdirectory |
| 81 | subdir := filepath.Join(tempDir, "subdir") |
| 82 | err := os.MkdirAll(subdir, 0o755) |
| 83 | if err != nil { |
| 84 | t.Fatalf("Failed to create subdir: %v", err) |
| 85 | } |
| 86 | |
| 87 | // Add file in subdirectory |
| 88 | testFiles["subdir/claude.한국어.md"] = "# Korean Claude file\n\nThis is a guidance file with Korean characters." |
| 89 | |
| 90 | // Write all test files |
| 91 | for filename, content := range testFiles { |
| 92 | fullPath := filepath.Join(tempDir, filename) |
| 93 | dir := filepath.Dir(fullPath) |
| 94 | if dir != tempDir { |
| 95 | err := os.MkdirAll(dir, 0o755) |
| 96 | if err != nil { |
| 97 | t.Fatalf("Failed to create directory %s: %v", dir, err) |
| 98 | } |
| 99 | } |
| 100 | err := os.WriteFile(fullPath, []byte(content), 0o644) |
| 101 | if err != nil { |
| 102 | t.Fatalf("Failed to write file %s: %v", filename, err) |
| 103 | } |
| 104 | } |
| 105 | |
| 106 | // Add all files to git at once |
| 107 | cmd = exec.Command("git", "add", ".") |
| 108 | cmd.Dir = tempDir |
| 109 | if err := cmd.Run(); err != nil { |
| 110 | t.Fatalf("Failed to add files to git: %v", err) |
| 111 | } |
| 112 | |
| Marc-Antoine Ruel | f1e517d | 2025-06-08 17:30:37 +0000 | [diff] [blame] | 113 | // Test with non-ASCII characters in filenames |
| Philip Zeyliger | 209ea91 | 2025-06-09 21:54:12 +0000 | [diff] [blame] | 114 | codebase, err := AnalyzeCodebase(context.Background(), tempDir) |
| Marc-Antoine Ruel | f1e517d | 2025-06-08 17:30:37 +0000 | [diff] [blame] | 115 | if err != nil { |
| 116 | t.Fatalf("AnalyzeCodebase failed with non-ASCII filenames: %v", err) |
| 117 | } |
| 118 | |
| 119 | if codebase == nil { |
| 120 | t.Fatal("Expected non-nil codebase") |
| 121 | } |
| 122 | |
| Philip Zeyliger | 209ea91 | 2025-06-09 21:54:12 +0000 | [diff] [blame] | 123 | // We expect 8 files in our temp directory |
| Marc-Antoine Ruel | f1e517d | 2025-06-08 17:30:37 +0000 | [diff] [blame] | 124 | expectedFiles := 8 |
| 125 | if codebase.TotalFiles != expectedFiles { |
| 126 | t.Errorf("Expected %d files, got %d", expectedFiles, codebase.TotalFiles) |
| 127 | } |
| 128 | |
| 129 | // Verify extension counts include our non-ASCII files |
| 130 | expectedExtensions := map[string]int{ |
| 131 | ".go": 1, // 测试文件.go |
| 132 | ".js": 1, // café.js |
| 133 | ".py": 1, // русский.py |
| Philip Zeyliger | 209ea91 | 2025-06-09 21:54:12 +0000 | [diff] [blame] | 134 | ".md": 3, // 🚀rocket.md, readme-español.md, claude.한국어.md |
| Marc-Antoine Ruel | f1e517d | 2025-06-08 17:30:37 +0000 | [diff] [blame] | 135 | ".html": 1, // Übung.html |
| 136 | "<no-extension>": 1, // Makefile-日本語 |
| 137 | } |
| 138 | |
| 139 | for ext, expectedCount := range expectedExtensions { |
| 140 | actualCount, exists := codebase.ExtensionCounts[ext] |
| 141 | if !exists { |
| 142 | t.Errorf("Expected extension %s to be found", ext) |
| 143 | continue |
| 144 | } |
| 145 | if actualCount != expectedCount { |
| 146 | t.Errorf("Expected %d files with extension %s, got %d", expectedCount, ext, actualCount) |
| 147 | } |
| 148 | } |
| 149 | |
| 150 | // Verify file categorization works with non-ASCII filenames |
| 151 | // Check build files |
| 152 | if !slices.Contains(codebase.BuildFiles, "Makefile-日本語") { |
| 153 | t.Error("Expected Makefile-日本語 to be categorized as a build file") |
| 154 | } |
| 155 | |
| 156 | // Check documentation files |
| 157 | if !slices.Contains(codebase.DocumentationFiles, "readme-español.md") { |
| 158 | t.Error("Expected readme-español.md to be categorized as a documentation file") |
| 159 | } |
| 160 | |
| 161 | // Check guidance files |
| 162 | if !slices.Contains(codebase.GuidanceFiles, "subdir/claude.한국어.md") { |
| 163 | t.Error("Expected subdir/claude.한국어.md to be categorized as a guidance file") |
| 164 | } |
| 165 | }) |
| 166 | } |
| 167 | |
| 168 | func TestCategorizeFile(t *testing.T) { |
| 169 | t.Run("Non-ASCII Filenames", func(t *testing.T) { |
| 170 | tests := []struct { |
| 171 | name string |
| 172 | path string |
| 173 | expected string |
| 174 | }{ |
| 175 | {"Chinese Go file", "测试文件.go", ""}, |
| 176 | {"French JS file", "café.js", ""}, |
| 177 | {"Russian Python file", "русский.py", ""}, |
| 178 | {"Emoji markdown file", "🚀rocket.md", ""}, |
| 179 | {"German HTML file", "Übung.html", ""}, |
| 180 | {"Japanese Makefile", "Makefile-日本語", "build"}, |
| 181 | {"Spanish README", "readme-español.md", "documentation"}, |
| 182 | {"Korean Claude file", "subdir/claude.한국어.md", "guidance"}, |
| 183 | // Test edge cases with Unicode normalization and combining characters |
| 184 | {"Mixed Unicode file", "test中文🚀.txt", ""}, |
| 185 | {"Combining characters", "filé̂.go", ""}, // file with combining acute and circumflex accents |
| 186 | {"Right-to-left script", "مرحبا.py", ""}, // Arabic "hello" |
| 187 | } |
| 188 | |
| 189 | for _, tt := range tests { |
| 190 | t.Run(tt.name, func(t *testing.T) { |
| 191 | result := categorizeFile(tt.path) |
| 192 | if result != tt.expected { |
| 193 | t.Errorf("categorizeFile(%q) = %q, want %q", tt.path, result, tt.expected) |
| 194 | } |
| 195 | }) |
| 196 | } |
| 197 | }) |
| 198 | } |
| 199 | |
| 200 | func TestTopExtensions(t *testing.T) { |
| 201 | t.Run("With Non-ASCII Files", func(t *testing.T) { |
| 202 | // Create a test codebase with known extension counts |
| 203 | codebase := &Codebase{ |
| 204 | ExtensionCounts: map[string]int{ |
| 205 | ".md": 5, // Most common |
| 206 | ".go": 3, |
| 207 | ".js": 2, |
| 208 | ".py": 1, |
| 209 | ".html": 1, // Least common |
| 210 | }, |
| 211 | TotalFiles: 12, |
| 212 | } |
| 213 | |
| 214 | topExt := codebase.TopExtensions() |
| 215 | if len(topExt) != 5 { |
| 216 | t.Errorf("Expected 5 top extensions, got %d", len(topExt)) |
| 217 | } |
| 218 | |
| 219 | // Check that extensions are sorted by count (descending) |
| 220 | expected := []string{ |
| 221 | ".md: 5 (42%)", |
| 222 | ".go: 3 (25%)", |
| 223 | ".js: 2 (17%)", |
| 224 | ".html: 1 (8%)", |
| 225 | ".py: 1 (8%)", |
| 226 | } |
| 227 | |
| 228 | for i, expectedExt := range expected { |
| 229 | if i >= len(topExt) { |
| 230 | t.Errorf("Missing expected extension at index %d: %s", i, expectedExt) |
| 231 | continue |
| 232 | } |
| 233 | if topExt[i] != expectedExt { |
| 234 | t.Errorf("Expected extension %q at index %d, got %q", expectedExt, i, topExt[i]) |
| 235 | } |
| 236 | } |
| 237 | }) |
| 238 | } |