blob: b3a0becde9451d9f27345d6170e9ab1fb0130d68 [file] [log] [blame]
package onstart
import (
"context"
"os"
"os/exec"
"path/filepath"
"slices"
"testing"
)
func TestAnalyzeCodebase(t *testing.T) {
t.Run("Basic Analysis", func(t *testing.T) {
// Test basic functionality with regular ASCII filenames
codebase, err := AnalyzeCodebase(context.Background(), ".")
if err != nil {
t.Fatalf("AnalyzeCodebase failed: %v", err)
}
if codebase == nil {
t.Fatal("Expected non-nil codebase")
}
if codebase.TotalFiles == 0 {
t.Error("Expected some files to be analyzed")
}
if len(codebase.ExtensionCounts) == 0 {
t.Error("Expected extension counts to be populated")
}
})
t.Run("Non-ASCII Filenames", func(t *testing.T) {
// Create a temporary directory with unicode filenames for testing
tempDir := t.TempDir()
// Initialize git repository
cmd := exec.Command("git", "init")
cmd.Dir = tempDir
if err := cmd.Run(); err != nil {
t.Fatalf("Failed to init git repo: %v", err)
}
cmd = exec.Command("git", "config", "user.name", "Test User")
cmd.Dir = tempDir
if err := cmd.Run(); err != nil {
t.Fatalf("Failed to set git user.name: %v", err)
}
cmd = exec.Command("git", "config", "user.email", "test@example.com")
cmd.Dir = tempDir
if err := cmd.Run(); err != nil {
t.Fatalf("Failed to set git user.email: %v", err)
}
// Configure git to handle unicode filenames properly
cmd = exec.Command("git", "config", "core.quotepath", "false")
cmd.Dir = tempDir
if err := cmd.Run(); err != nil {
t.Fatalf("Failed to set git core.quotepath: %v", err)
}
cmd = exec.Command("git", "config", "core.precomposeunicode", "true")
cmd.Dir = tempDir
if err := cmd.Run(); err != nil {
t.Fatalf("Failed to set git core.precomposeunicode: %v", err)
}
// Create test files with unicode characters dynamically
testFiles := map[string]string{
"测试文件.go": "// Package test with Chinese characters in filename\npackage test\n\nfunc TestFunction() {\n\t// This is a test file\n}",
"café.js": "// JavaScript file with French characters\nconsole.log('Hello from café!');",
"русский.py": "# Python file with Russian characters\nprint('Привет мир!')",
"🚀rocket.md": "# README with Emoji\n\nThis file has an emoji in the filename.",
"readme-español.md": "# Spanish README\n\nEste es un archivo de documentación.",
"Übung.html": "<!DOCTYPE html>\n<html><head><title>German Exercise</title></head><body><h1>Übung</h1></body></html>",
"Makefile-日本語": "# Japanese Makefile\nall:\n\techo 'Japanese makefile'",
}
// Create subdirectory
subdir := filepath.Join(tempDir, "subdir")
err := os.MkdirAll(subdir, 0o755)
if err != nil {
t.Fatalf("Failed to create subdir: %v", err)
}
// Add file in subdirectory
testFiles["subdir/claude.한국어.md"] = "# Korean Claude file\n\nThis is a guidance file with Korean characters."
// Write all test files
for filename, content := range testFiles {
fullPath := filepath.Join(tempDir, filename)
dir := filepath.Dir(fullPath)
if dir != tempDir {
err := os.MkdirAll(dir, 0o755)
if err != nil {
t.Fatalf("Failed to create directory %s: %v", dir, err)
}
}
err := os.WriteFile(fullPath, []byte(content), 0o644)
if err != nil {
t.Fatalf("Failed to write file %s: %v", filename, err)
}
}
// Add all files to git at once
cmd = exec.Command("git", "add", ".")
cmd.Dir = tempDir
if err := cmd.Run(); err != nil {
t.Fatalf("Failed to add files to git: %v", err)
}
// Test with non-ASCII characters in filenames
codebase, err := AnalyzeCodebase(context.Background(), tempDir)
if err != nil {
t.Fatalf("AnalyzeCodebase failed with non-ASCII filenames: %v", err)
}
if codebase == nil {
t.Fatal("Expected non-nil codebase")
}
// We expect 8 files in our temp directory
expectedFiles := 8
if codebase.TotalFiles != expectedFiles {
t.Errorf("Expected %d files, got %d", expectedFiles, codebase.TotalFiles)
}
// Verify extension counts include our non-ASCII files
expectedExtensions := map[string]int{
".go": 1, // 测试文件.go
".js": 1, // café.js
".py": 1, // русский.py
".md": 3, // 🚀rocket.md, readme-español.md, claude.한국어.md
".html": 1, // Übung.html
"<no-extension>": 1, // Makefile-日本語
}
for ext, expectedCount := range expectedExtensions {
actualCount, exists := codebase.ExtensionCounts[ext]
if !exists {
t.Errorf("Expected extension %s to be found", ext)
continue
}
if actualCount != expectedCount {
t.Errorf("Expected %d files with extension %s, got %d", expectedCount, ext, actualCount)
}
}
// Verify file categorization works with non-ASCII filenames
// Check build files
if !slices.Contains(codebase.BuildFiles, "Makefile-日本語") {
t.Error("Expected Makefile-日本語 to be categorized as a build file")
}
// Check documentation files
if !slices.Contains(codebase.DocumentationFiles, "readme-español.md") {
t.Error("Expected readme-español.md to be categorized as a documentation file")
}
// Check guidance files
if !slices.Contains(codebase.GuidanceFiles, "subdir/claude.한국어.md") {
t.Error("Expected subdir/claude.한국어.md to be categorized as a guidance file")
}
})
}
func TestCategorizeFile(t *testing.T) {
t.Run("Non-ASCII Filenames", func(t *testing.T) {
tests := []struct {
name string
path string
expected string
}{
{"Chinese Go file", "测试文件.go", ""},
{"French JS file", "café.js", ""},
{"Russian Python file", "русский.py", ""},
{"Emoji markdown file", "🚀rocket.md", ""},
{"German HTML file", "Übung.html", ""},
{"Japanese Makefile", "Makefile-日本語", "build"},
{"Spanish README", "readme-español.md", "documentation"},
{"Korean Claude file", "subdir/claude.한국어.md", "guidance"},
// Test edge cases with Unicode normalization and combining characters
{"Mixed Unicode file", "test中文🚀.txt", ""},
{"Combining characters", "filé̂.go", ""}, // file with combining acute and circumflex accents
{"Right-to-left script", "مرحبا.py", ""}, // Arabic "hello"
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := categorizeFile(tt.path)
if result != tt.expected {
t.Errorf("categorizeFile(%q) = %q, want %q", tt.path, result, tt.expected)
}
})
}
})
}
func TestTopExtensions(t *testing.T) {
t.Run("With Non-ASCII Files", func(t *testing.T) {
// Create a test codebase with known extension counts
codebase := &Codebase{
ExtensionCounts: map[string]int{
".md": 5, // Most common
".go": 3,
".js": 2,
".py": 1,
".html": 1, // Least common
},
TotalFiles: 12,
}
topExt := codebase.TopExtensions()
if len(topExt) != 5 {
t.Errorf("Expected 5 top extensions, got %d", len(topExt))
}
// Check that extensions are sorted by count (descending)
expected := []string{
".md: 5 (42%)",
".go: 3 (25%)",
".js: 2 (17%)",
".html: 1 (8%)",
".py: 1 (8%)",
}
for i, expectedExt := range expected {
if i >= len(topExt) {
t.Errorf("Missing expected extension at index %d: %s", i, expectedExt)
continue
}
if topExt[i] != expectedExt {
t.Errorf("Expected extension %q at index %d, got %q", expectedExt, i, topExt[i])
}
}
})
}