blob: b3a0becde9451d9f27345d6170e9ab1fb0130d68 [file] [log] [blame]
Marc-Antoine Ruelf1e517d2025-06-08 17:30:37 +00001package onstart
2
3import (
4 "context"
Philip Zeyliger209ea912025-06-09 21:54:12 +00005 "os"
6 "os/exec"
7 "path/filepath"
Marc-Antoine Ruelf1e517d2025-06-08 17:30:37 +00008 "slices"
9 "testing"
10)
11
12func TestAnalyzeCodebase(t *testing.T) {
13 t.Run("Basic Analysis", func(t *testing.T) {
14 // Test basic functionality with regular ASCII filenames
15 codebase, err := AnalyzeCodebase(context.Background(), ".")
16 if err != nil {
17 t.Fatalf("AnalyzeCodebase failed: %v", err)
18 }
19
20 if codebase == nil {
21 t.Fatal("Expected non-nil codebase")
22 }
23
24 if codebase.TotalFiles == 0 {
25 t.Error("Expected some files to be analyzed")
26 }
27
28 if len(codebase.ExtensionCounts) == 0 {
29 t.Error("Expected extension counts to be populated")
30 }
31 })
32
33 t.Run("Non-ASCII Filenames", func(t *testing.T) {
Philip Zeyliger209ea912025-06-09 21:54:12 +000034 // Create a temporary directory with unicode filenames for testing
35 tempDir := t.TempDir()
36
37 // Initialize git repository
38 cmd := exec.Command("git", "init")
39 cmd.Dir = tempDir
40 if err := cmd.Run(); err != nil {
41 t.Fatalf("Failed to init git repo: %v", err)
42 }
43
44 cmd = exec.Command("git", "config", "user.name", "Test User")
45 cmd.Dir = tempDir
46 if err := cmd.Run(); err != nil {
47 t.Fatalf("Failed to set git user.name: %v", err)
48 }
49
50 cmd = exec.Command("git", "config", "user.email", "test@example.com")
51 cmd.Dir = tempDir
52 if err := cmd.Run(); err != nil {
53 t.Fatalf("Failed to set git user.email: %v", err)
54 }
55
56 // Configure git to handle unicode filenames properly
57 cmd = exec.Command("git", "config", "core.quotepath", "false")
58 cmd.Dir = tempDir
59 if err := cmd.Run(); err != nil {
60 t.Fatalf("Failed to set git core.quotepath: %v", err)
61 }
62
63 cmd = exec.Command("git", "config", "core.precomposeunicode", "true")
64 cmd.Dir = tempDir
65 if err := cmd.Run(); err != nil {
66 t.Fatalf("Failed to set git core.precomposeunicode: %v", err)
67 }
68
69 // Create test files with unicode characters dynamically
70 testFiles := map[string]string{
71 "测试文件.go": "// Package test with Chinese characters in filename\npackage test\n\nfunc TestFunction() {\n\t// This is a test file\n}",
72 "café.js": "// JavaScript file with French characters\nconsole.log('Hello from café!');",
73 "русский.py": "# Python file with Russian characters\nprint('Привет мир!')",
74 "🚀rocket.md": "# README with Emoji\n\nThis file has an emoji in the filename.",
75 "readme-español.md": "# Spanish README\n\nEste es un archivo de documentación.",
76 "Übung.html": "<!DOCTYPE html>\n<html><head><title>German Exercise</title></head><body><h1>Übung</h1></body></html>",
77 "Makefile-日本語": "# Japanese Makefile\nall:\n\techo 'Japanese makefile'",
78 }
79
80 // Create subdirectory
81 subdir := filepath.Join(tempDir, "subdir")
82 err := os.MkdirAll(subdir, 0o755)
83 if err != nil {
84 t.Fatalf("Failed to create subdir: %v", err)
85 }
86
87 // Add file in subdirectory
88 testFiles["subdir/claude.한국어.md"] = "# Korean Claude file\n\nThis is a guidance file with Korean characters."
89
90 // Write all test files
91 for filename, content := range testFiles {
92 fullPath := filepath.Join(tempDir, filename)
93 dir := filepath.Dir(fullPath)
94 if dir != tempDir {
95 err := os.MkdirAll(dir, 0o755)
96 if err != nil {
97 t.Fatalf("Failed to create directory %s: %v", dir, err)
98 }
99 }
100 err := os.WriteFile(fullPath, []byte(content), 0o644)
101 if err != nil {
102 t.Fatalf("Failed to write file %s: %v", filename, err)
103 }
104 }
105
106 // Add all files to git at once
107 cmd = exec.Command("git", "add", ".")
108 cmd.Dir = tempDir
109 if err := cmd.Run(); err != nil {
110 t.Fatalf("Failed to add files to git: %v", err)
111 }
112
Marc-Antoine Ruelf1e517d2025-06-08 17:30:37 +0000113 // Test with non-ASCII characters in filenames
Philip Zeyliger209ea912025-06-09 21:54:12 +0000114 codebase, err := AnalyzeCodebase(context.Background(), tempDir)
Marc-Antoine Ruelf1e517d2025-06-08 17:30:37 +0000115 if err != nil {
116 t.Fatalf("AnalyzeCodebase failed with non-ASCII filenames: %v", err)
117 }
118
119 if codebase == nil {
120 t.Fatal("Expected non-nil codebase")
121 }
122
Philip Zeyliger209ea912025-06-09 21:54:12 +0000123 // We expect 8 files in our temp directory
Marc-Antoine Ruelf1e517d2025-06-08 17:30:37 +0000124 expectedFiles := 8
125 if codebase.TotalFiles != expectedFiles {
126 t.Errorf("Expected %d files, got %d", expectedFiles, codebase.TotalFiles)
127 }
128
129 // Verify extension counts include our non-ASCII files
130 expectedExtensions := map[string]int{
131 ".go": 1, // 测试文件.go
132 ".js": 1, // café.js
133 ".py": 1, // русский.py
Philip Zeyliger209ea912025-06-09 21:54:12 +0000134 ".md": 3, // 🚀rocket.md, readme-español.md, claude.한국어.md
Marc-Antoine Ruelf1e517d2025-06-08 17:30:37 +0000135 ".html": 1, // Übung.html
136 "<no-extension>": 1, // Makefile-日本語
137 }
138
139 for ext, expectedCount := range expectedExtensions {
140 actualCount, exists := codebase.ExtensionCounts[ext]
141 if !exists {
142 t.Errorf("Expected extension %s to be found", ext)
143 continue
144 }
145 if actualCount != expectedCount {
146 t.Errorf("Expected %d files with extension %s, got %d", expectedCount, ext, actualCount)
147 }
148 }
149
150 // Verify file categorization works with non-ASCII filenames
151 // Check build files
152 if !slices.Contains(codebase.BuildFiles, "Makefile-日本語") {
153 t.Error("Expected Makefile-日本語 to be categorized as a build file")
154 }
155
156 // Check documentation files
157 if !slices.Contains(codebase.DocumentationFiles, "readme-español.md") {
158 t.Error("Expected readme-español.md to be categorized as a documentation file")
159 }
160
161 // Check guidance files
162 if !slices.Contains(codebase.GuidanceFiles, "subdir/claude.한국어.md") {
163 t.Error("Expected subdir/claude.한국어.md to be categorized as a guidance file")
164 }
165 })
166}
167
168func TestCategorizeFile(t *testing.T) {
169 t.Run("Non-ASCII Filenames", func(t *testing.T) {
170 tests := []struct {
171 name string
172 path string
173 expected string
174 }{
175 {"Chinese Go file", "测试文件.go", ""},
176 {"French JS file", "café.js", ""},
177 {"Russian Python file", "русский.py", ""},
178 {"Emoji markdown file", "🚀rocket.md", ""},
179 {"German HTML file", "Übung.html", ""},
180 {"Japanese Makefile", "Makefile-日本語", "build"},
181 {"Spanish README", "readme-español.md", "documentation"},
182 {"Korean Claude file", "subdir/claude.한국어.md", "guidance"},
183 // Test edge cases with Unicode normalization and combining characters
184 {"Mixed Unicode file", "test中文🚀.txt", ""},
185 {"Combining characters", "filé̂.go", ""}, // file with combining acute and circumflex accents
186 {"Right-to-left script", "مرحبا.py", ""}, // Arabic "hello"
187 }
188
189 for _, tt := range tests {
190 t.Run(tt.name, func(t *testing.T) {
191 result := categorizeFile(tt.path)
192 if result != tt.expected {
193 t.Errorf("categorizeFile(%q) = %q, want %q", tt.path, result, tt.expected)
194 }
195 })
196 }
197 })
198}
199
200func TestTopExtensions(t *testing.T) {
201 t.Run("With Non-ASCII Files", func(t *testing.T) {
202 // Create a test codebase with known extension counts
203 codebase := &Codebase{
204 ExtensionCounts: map[string]int{
205 ".md": 5, // Most common
206 ".go": 3,
207 ".js": 2,
208 ".py": 1,
209 ".html": 1, // Least common
210 },
211 TotalFiles: 12,
212 }
213
214 topExt := codebase.TopExtensions()
215 if len(topExt) != 5 {
216 t.Errorf("Expected 5 top extensions, got %d", len(topExt))
217 }
218
219 // Check that extensions are sorted by count (descending)
220 expected := []string{
221 ".md: 5 (42%)",
222 ".go: 3 (25%)",
223 ".js: 2 (17%)",
224 ".html: 1 (8%)",
225 ".py: 1 (8%)",
226 }
227
228 for i, expectedExt := range expected {
229 if i >= len(topExt) {
230 t.Errorf("Missing expected extension at index %d: %s", i, expectedExt)
231 continue
232 }
233 if topExt[i] != expectedExt {
234 t.Errorf("Expected extension %q at index %d, got %q", expectedExt, i, topExt[i])
235 }
236 }
237 })
238}