blob: be70ce7312aada509a732af8a9ac8f7f177ed67f [file] [log] [blame]
Marc-Antoine Ruelf1e517d2025-06-08 17:30:37 +00001package onstart
2
3import (
4 "context"
5 "slices"
6 "testing"
7)
8
9func TestAnalyzeCodebase(t *testing.T) {
10 t.Run("Basic Analysis", func(t *testing.T) {
11 // Test basic functionality with regular ASCII filenames
12 codebase, err := AnalyzeCodebase(context.Background(), ".")
13 if err != nil {
14 t.Fatalf("AnalyzeCodebase failed: %v", err)
15 }
16
17 if codebase == nil {
18 t.Fatal("Expected non-nil codebase")
19 }
20
21 if codebase.TotalFiles == 0 {
22 t.Error("Expected some files to be analyzed")
23 }
24
25 if len(codebase.ExtensionCounts) == 0 {
26 t.Error("Expected extension counts to be populated")
27 }
28 })
29
30 t.Run("Non-ASCII Filenames", func(t *testing.T) {
31 // Test with non-ASCII characters in filenames
32 testdataPath := "./testdata"
33 codebase, err := AnalyzeCodebase(context.Background(), testdataPath)
34 if err != nil {
35 t.Fatalf("AnalyzeCodebase failed with non-ASCII filenames: %v", err)
36 }
37
38 if codebase == nil {
39 t.Fatal("Expected non-nil codebase")
40 }
41
42 // We expect 8 files in our testdata directory
43 expectedFiles := 8
44 if codebase.TotalFiles != expectedFiles {
45 t.Errorf("Expected %d files, got %d", expectedFiles, codebase.TotalFiles)
46 }
47
48 // Verify extension counts include our non-ASCII files
49 expectedExtensions := map[string]int{
50 ".go": 1, // 测试文件.go
51 ".js": 1, // café.js
52 ".py": 1, // русский.py
53 ".md": 3, // 🚀rocket.md, readme-español.md, claude-한국어.md
54 ".html": 1, // Übung.html
55 "<no-extension>": 1, // Makefile-日本語
56 }
57
58 for ext, expectedCount := range expectedExtensions {
59 actualCount, exists := codebase.ExtensionCounts[ext]
60 if !exists {
61 t.Errorf("Expected extension %s to be found", ext)
62 continue
63 }
64 if actualCount != expectedCount {
65 t.Errorf("Expected %d files with extension %s, got %d", expectedCount, ext, actualCount)
66 }
67 }
68
69 // Verify file categorization works with non-ASCII filenames
70 // Check build files
71 if !slices.Contains(codebase.BuildFiles, "Makefile-日本語") {
72 t.Error("Expected Makefile-日本語 to be categorized as a build file")
73 }
74
75 // Check documentation files
76 if !slices.Contains(codebase.DocumentationFiles, "readme-español.md") {
77 t.Error("Expected readme-español.md to be categorized as a documentation file")
78 }
79
80 // Check guidance files
81 if !slices.Contains(codebase.GuidanceFiles, "subdir/claude.한국어.md") {
82 t.Error("Expected subdir/claude.한국어.md to be categorized as a guidance file")
83 }
84 })
85}
86
87func TestCategorizeFile(t *testing.T) {
88 t.Run("Non-ASCII Filenames", func(t *testing.T) {
89 tests := []struct {
90 name string
91 path string
92 expected string
93 }{
94 {"Chinese Go file", "测试文件.go", ""},
95 {"French JS file", "café.js", ""},
96 {"Russian Python file", "русский.py", ""},
97 {"Emoji markdown file", "🚀rocket.md", ""},
98 {"German HTML file", "Übung.html", ""},
99 {"Japanese Makefile", "Makefile-日本語", "build"},
100 {"Spanish README", "readme-español.md", "documentation"},
101 {"Korean Claude file", "subdir/claude.한국어.md", "guidance"},
102 // Test edge cases with Unicode normalization and combining characters
103 {"Mixed Unicode file", "test中文🚀.txt", ""},
104 {"Combining characters", "filé̂.go", ""}, // file with combining acute and circumflex accents
105 {"Right-to-left script", "مرحبا.py", ""}, // Arabic "hello"
106 }
107
108 for _, tt := range tests {
109 t.Run(tt.name, func(t *testing.T) {
110 result := categorizeFile(tt.path)
111 if result != tt.expected {
112 t.Errorf("categorizeFile(%q) = %q, want %q", tt.path, result, tt.expected)
113 }
114 })
115 }
116 })
117}
118
119func TestTopExtensions(t *testing.T) {
120 t.Run("With Non-ASCII Files", func(t *testing.T) {
121 // Create a test codebase with known extension counts
122 codebase := &Codebase{
123 ExtensionCounts: map[string]int{
124 ".md": 5, // Most common
125 ".go": 3,
126 ".js": 2,
127 ".py": 1,
128 ".html": 1, // Least common
129 },
130 TotalFiles: 12,
131 }
132
133 topExt := codebase.TopExtensions()
134 if len(topExt) != 5 {
135 t.Errorf("Expected 5 top extensions, got %d", len(topExt))
136 }
137
138 // Check that extensions are sorted by count (descending)
139 expected := []string{
140 ".md: 5 (42%)",
141 ".go: 3 (25%)",
142 ".js: 2 (17%)",
143 ".html: 1 (8%)",
144 ".py: 1 (8%)",
145 }
146
147 for i, expectedExt := range expected {
148 if i >= len(topExt) {
149 t.Errorf("Missing expected extension at index %d: %s", i, expectedExt)
150 continue
151 }
152 if topExt[i] != expectedExt {
153 t.Errorf("Expected extension %q at index %d, got %q", expectedExt, i, topExt[i])
154 }
155 }
156 })
157}