claudetool: improve codereview
Do a bunch of un-vibecoding and bug fixing.
Unfortunately, lots left.
Get rid of vet; gopls check covers it.
Add testing infrastructure and a bunch of fixtures.
diff --git a/claudetool/codereview_test.go b/claudetool/codereview_test.go
new file mode 100644
index 0000000..71934a2
--- /dev/null
+++ b/claudetool/codereview_test.go
@@ -0,0 +1,290 @@
+package claudetool
+
+import (
+ "bytes"
+ "cmp"
+ "context"
+ "flag"
+ "fmt"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "slices"
+ "strings"
+ "testing"
+
+ "golang.org/x/tools/txtar"
+)
+
+// updateTests is set to true when the -update flag is used.
+// This will update the expected test results instead of failing tests.
+var updateTests = flag.Bool("update", false, "update expected test results instead of failing tests")
+
+// TestCodereviewDifferential runs all the end-to-end tests for the codereview and differential packages.
+// Each test is defined in a .txtar file in the testdata directory.
+func TestCodereviewDifferential(t *testing.T) {
+ entries, err := os.ReadDir("testdata")
+ if err != nil {
+ t.Fatalf("failed to read testdata directory: %v", err)
+ }
+ for _, entry := range entries {
+ if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".txtar") {
+ continue
+ }
+ testPath := filepath.Join("testdata", entry.Name())
+ testName := strings.TrimSuffix(entry.Name(), ".txtar")
+ t.Run(testName, func(t *testing.T) {
+ t.Parallel()
+ runE2ETest(t, testPath, *updateTests)
+ })
+ }
+}
+
+// runE2ETest executes a single end-to-end test from a .txtar file.
+func runE2ETest(t *testing.T, testPath string, update bool) {
+ orig, err := os.ReadFile(testPath)
+ if err != nil {
+ t.Fatalf("failed to read test file %s: %v", testPath, err)
+ }
+ archive, err := txtar.ParseFile(testPath)
+ if err != nil {
+ t.Fatalf("failed to parse txtar file %s: %v", testPath, err)
+ }
+
+ tmpDir := t.TempDir()
+ // resolve temp dir path so that we can canonicalize/normalize it later
+ tmpDir = resolveRealPath(tmpDir)
+
+ if err := initGoModule(tmpDir); err != nil {
+ t.Fatalf("failed to initialize Go module: %v", err)
+ }
+ if err := initGitRepo(tmpDir); err != nil {
+ t.Fatalf("failed to initialize git repository: %v", err)
+ }
+ if err := processTestFiles(t, archive, tmpDir, update); err != nil {
+ t.Fatalf("error processing test files: %v", err)
+ }
+
+ // If we're updating, write back the modified archive to the file
+ if update {
+ updatedContent := txtar.Format(archive)
+ // only write back changes, avoids git status churn
+ if !bytes.Equal(orig, updatedContent) {
+ if err := os.WriteFile(testPath, updatedContent, 0o644); err != nil {
+ t.Errorf("Failed to update test file %s: %v", testPath, err)
+ }
+ }
+ }
+}
+
+func gitCommitEnv() []string {
+ return append(os.Environ(),
+ "GIT_AUTHOR_NAME=Test Author",
+ "GIT_AUTHOR_EMAIL=test@example.com",
+ "GIT_COMMITTER_NAME=Test Committer",
+ "GIT_COMMITTER_EMAIL=test@example.com",
+ "GIT_AUTHOR_DATE=2025-01-01T00:00:00Z",
+ "GIT_COMMITTER_DATE=2025-01-01T00:00:00Z",
+ )
+}
+
+// initGitRepo initializes a new git repository in the specified directory.
+func initGitRepo(dir string) error {
+ cmd := exec.Command("git", "init")
+ cmd.Dir = dir
+ err := cmd.Run()
+ if err != nil {
+ return fmt.Errorf("error initializing git repository: %w", err)
+ }
+ // create a single commit out of everything there now
+ cmd = exec.Command("git", "add", ".")
+ cmd.Dir = dir
+ err = cmd.Run()
+ if err != nil {
+ return fmt.Errorf("error staging files: %w", err)
+ }
+ cmd = exec.Command("git", "commit", "-m", "create repo")
+ cmd.Dir = dir
+ cmd.Env = gitCommitEnv()
+ err = cmd.Run()
+ if err != nil {
+ return fmt.Errorf("error making initial commit: %w", err)
+ }
+ return nil
+}
+
+// processTestFiles processes the files in the txtar archive in sequence.
+func processTestFiles(t *testing.T, archive *txtar.Archive, dir string, update bool) error {
+ var initialCommit string
+ filesForNextCommit := make(map[string]bool)
+
+ for i, file := range archive.Files {
+ switch file.Name {
+ case ".commit":
+ commit, err := makeGitCommit(dir, string(file.Data), filesForNextCommit)
+ if err != nil {
+ return fmt.Errorf("error making git commit: %w", err)
+ }
+ clear(filesForNextCommit)
+ initialCommit = cmp.Or(initialCommit, commit)
+ // fmt.Println("initial commit:", initialCommit)
+ // cmd := exec.Command("git", "log")
+ // cmd.Dir = dir
+ // cmd.Stdout = os.Stdout
+ // cmd.Run()
+
+ case ".run_test":
+ got, err := runDifferentialTest(dir, initialCommit)
+ if err != nil {
+ return fmt.Errorf("error running differential test: %w", err)
+ }
+ want := string(file.Data)
+
+ if update {
+ archive.Files[i].Data = []byte(got)
+ break
+ }
+ if strings.TrimSpace(want) != strings.TrimSpace(got) {
+ t.Errorf("Results don't match.\nExpected:\n%s\n\nActual:\n%s", want, got)
+ }
+
+ case ".run_autoformat":
+ if initialCommit == "" {
+ return fmt.Errorf("initial commit not set, cannot run autoformat")
+ }
+
+ got, err := runAutoformat(dir, initialCommit)
+ if err != nil {
+ return fmt.Errorf("error running autoformat: %w", err)
+ }
+ slices.Sort(got)
+
+ if update {
+ correct := strings.Join(got, "\n") + "\n"
+ archive.Files[i].Data = []byte(correct)
+ break
+ }
+
+ want := strings.Split(strings.TrimSpace(string(file.Data)), "\n")
+ if !slices.Equal(want, got) {
+ t.Errorf("Formatted files don't match.\nExpected:\n%s\n\nActual:\n%s", want, got)
+ }
+
+ default:
+ filePath := filepath.Join(dir, file.Name)
+ if err := os.MkdirAll(filepath.Dir(filePath), 0o700); err != nil {
+ return fmt.Errorf("error creating directory for %s: %w", file.Name, err)
+ }
+ data := file.Data
+ // Remove second trailing newline if present.
+ // An annoyance of the txtar format, messes with gofmt.
+ if bytes.HasSuffix(data, []byte("\n\n")) {
+ data = bytes.TrimSuffix(data, []byte("\n"))
+ }
+ if err := os.WriteFile(filePath, file.Data, 0o600); err != nil {
+ return fmt.Errorf("error writing file %s: %w", file.Name, err)
+ }
+ filesForNextCommit[file.Name] = true
+ }
+ }
+
+ return nil
+}
+
+// makeGitCommit commits the specified files with the given message.
+// Returns the commit hash.
+func makeGitCommit(dir, message string, files map[string]bool) (string, error) {
+ for file := range files {
+ cmd := exec.Command("git", "add", file)
+ cmd.Dir = dir
+ if err := cmd.Run(); err != nil {
+ return "", fmt.Errorf("error staging file %s: %w", file, err)
+ }
+ }
+ message = cmp.Or(message, "Test commit")
+
+ // Make the commit with fixed author, committer, and date for stable hashes
+ cmd := exec.Command("git", "commit", "--allow-empty", "-m", message)
+ cmd.Dir = dir
+ cmd.Env = gitCommitEnv()
+ if err := cmd.Run(); err != nil {
+ return "", fmt.Errorf("error making commit: %w", err)
+ }
+
+ // Get the commit hash
+ cmd = exec.Command("git", "rev-parse", "HEAD")
+ cmd.Dir = dir
+ out, err := cmd.Output()
+ if err != nil {
+ return "", fmt.Errorf("error getting commit hash: %w", err)
+ }
+
+ return strings.TrimSpace(string(out)), nil
+}
+
+// runDifferentialTest runs the code review tool on the repository and returns the result.
+func runDifferentialTest(dir, initialCommit string) (string, error) {
+ if initialCommit == "" {
+ return "", fmt.Errorf("initial commit not set, cannot run differential test")
+ }
+
+ // Create a code reviewer for the repository
+ ctx := context.Background()
+ reviewer, err := NewCodeReviewer(ctx, dir, initialCommit)
+ if err != nil {
+ return "", fmt.Errorf("error creating code reviewer: %w", err)
+ }
+
+ // Run the code review
+ result, err := reviewer.Run(ctx, nil)
+ if err != nil {
+ return "", fmt.Errorf("error running code review: %w", err)
+ }
+
+ // Normalize paths in the result
+ normalized := normalizePaths(result, dir)
+ return normalized, nil
+}
+
+// normalizePaths replaces the temp directory paths with a standard placeholder
+func normalizePaths(result string, tempDir string) string {
+ return strings.ReplaceAll(result, tempDir, "/PATH/TO/REPO")
+}
+
+// initGoModule initializes a Go module in the specified directory.
+func initGoModule(dir string) error {
+ cmd := exec.Command("go", "mod", "init", "sketch.dev")
+ cmd.Dir = dir
+ return cmd.Run()
+}
+
+// runAutoformat runs the autoformat function on the repository and returns the list of formatted files.
+func runAutoformat(dir, initialCommit string) ([]string, error) {
+ if initialCommit == "" {
+ return nil, fmt.Errorf("initial commit not set, cannot run autoformat")
+ }
+ ctx := context.Background()
+ reviewer, err := NewCodeReviewer(ctx, dir, initialCommit)
+ if err != nil {
+ return nil, fmt.Errorf("error creating code reviewer: %w", err)
+ }
+ formattedFiles := reviewer.Autoformat(ctx)
+ normalizedFiles := make([]string, len(formattedFiles))
+ for i, file := range formattedFiles {
+ normalizedFiles[i] = normalizePaths(file, dir)
+ }
+ slices.Sort(normalizedFiles)
+ return normalizedFiles, nil
+}
+
+// resolveRealPath follows symlinks and returns the real path
+// This handles platform-specific behaviors like macOS's /private prefix
+func resolveRealPath(path string) string {
+ // Follow symlinks to get the real path
+ realPath, err := filepath.EvalSymlinks(path)
+ if err != nil {
+ // If we can't resolve symlinks, just return the original path
+ return path
+ }
+ return realPath
+}
diff --git a/claudetool/differential.go b/claudetool/differential.go
index 14dce04..014fc13 100644
--- a/claudetool/differential.go
+++ b/claudetool/differential.go
@@ -6,6 +6,7 @@
"encoding/json"
"fmt"
"io"
+ "io/fs"
"log/slog"
"maps"
"os"
@@ -69,14 +70,13 @@
// The packages in the initial commit may be different.
// Good enough for now.
// TODO: do better
- directPkgs, allPkgs, err := r.packagesForFiles(ctx, changedFiles)
+ allPkgs, err := r.packagesForFiles(ctx, changedFiles)
if err != nil {
// TODO: log and skip to stuff that doesn't require packages
slog.DebugContext(ctx, "CodeReviewer.Run: failed to get packages for files", "err", err)
return "", err
}
allPkgList := slices.Collect(maps.Keys(allPkgs))
- directPkgList := slices.Collect(maps.Keys(directPkgs))
var msgs []string
@@ -89,16 +89,7 @@
msgs = append(msgs, testMsg)
}
- vetMsg, err := r.checkVet(ctx, directPkgList)
- if err != nil {
- slog.DebugContext(ctx, "CodeReviewer.Run: failed to check vet", "err", err)
- return "", err
- }
- if vetMsg != "" {
- msgs = append(msgs, vetMsg)
- }
-
- goplsMsg, err := r.checkGopls(ctx, changedFiles)
+ goplsMsg, err := r.checkGopls(ctx, changedFiles) // includes vet checks
if err != nil {
slog.DebugContext(ctx, "CodeReviewer.Run: failed to check gopls", "err", err)
return "", err
@@ -111,6 +102,8 @@
slog.DebugContext(ctx, "CodeReviewer.Run: no issues found")
return "OK", nil
}
+
+ msgs = append(msgs, "Please fix before proceeding.")
slog.DebugContext(ctx, "CodeReviewer.Run: found issues", "issues", msgs)
return strings.Join(msgs, "\n\n"), nil
}
@@ -139,10 +132,9 @@
afterTestCmd := exec.CommandContext(ctx, "go", goTestArgs...)
afterTestCmd.Dir = r.repoRoot
- afterTestOut, afterTestErr := afterTestCmd.Output()
- if afterTestErr == nil {
- return "", nil // all tests pass, we're good!
- }
+ afterTestOut, _ := afterTestCmd.Output()
+ // unfortunately, we can't short-circuit here even if all tests pass,
+ // because we need to check for skipped tests.
err := r.initializeInitialCommitWorktree(ctx)
if err != nil {
@@ -162,7 +154,6 @@
if afterParseErr != nil {
return "", fmt.Errorf("unable to parse test results for current commit: %w\n%s", afterParseErr, afterTestOut)
}
-
testRegressions, err := r.compareTestResults(beforeResults, afterResults)
if err != nil {
return "", fmt.Errorf("failed to compare test results: %w", err)
@@ -172,281 +163,6 @@
return res, nil
}
-// VetIssue represents a single issue found by go vet
-type VetIssue struct {
- Position string `json:"posn"`
- Message string `json:"message"`
- // Ignoring suggested_fixes for now as we don't need them for comparison
-}
-
-// VetResult represents the JSON output of go vet -json for a single package
-type VetResult map[string][]VetIssue // category -> issues
-
-// VetResults represents the full JSON output of go vet -json
-type VetResults map[string]VetResult // package path -> result
-
-// checkVet runs go vet on the provided packages in both the current and initial state,
-// compares the results, and reports any new vet issues introduced in the current state.
-func (r *CodeReviewer) checkVet(ctx context.Context, pkgList []string) (string, error) {
- if len(pkgList) == 0 {
- return "", nil // no packages to check
- }
-
- // Run vet on the current state with JSON output
- goVetArgs := []string{"vet", "-json"}
- goVetArgs = append(goVetArgs, pkgList...)
-
- afterVetCmd := exec.CommandContext(ctx, "go", goVetArgs...)
- afterVetCmd.Dir = r.repoRoot
- afterVetOut, afterVetErr := afterVetCmd.CombinedOutput() // ignore error, we'll parse the output regar
- if afterVetErr != nil {
- slog.WarnContext(ctx, "CodeReviewer.checkVet: (after) go vet failed", "err", afterVetErr, "output", string(afterVetOut))
- return "", nil // nothing more we can do here
- }
-
- // Parse the JSON output (even if vet returned an error, as it does when issues are found)
- afterVetResults, err := parseVetJSON(afterVetOut)
- if err != nil {
- return "", fmt.Errorf("failed to parse vet output for current state: %w", err)
- }
-
- // If no issues were found, we're done
- if len(afterVetResults) == 0 || !vetResultsHaveIssues(afterVetResults) {
- return "", nil
- }
-
- // Vet detected issues in the current state, check if they existed in the initial state
- err = r.initializeInitialCommitWorktree(ctx)
- if err != nil {
- return "", err
- }
-
- beforeVetCmd := exec.CommandContext(ctx, "go", goVetArgs...)
- beforeVetCmd.Dir = r.initialWorktree
- beforeVetOut, _ := beforeVetCmd.CombinedOutput() // ignore error, we'll parse the output anyway
-
- // Parse the JSON output for the initial state
- beforeVetResults, err := parseVetJSON(beforeVetOut)
- if err != nil {
- return "", fmt.Errorf("failed to parse vet output for initial state: %w", err)
- }
-
- // Find new issues that weren't present in the initial state
- vetRegressions := findVetRegressions(beforeVetResults, afterVetResults)
- if !vetResultsHaveIssues(vetRegressions) {
- return "", nil // no new issues
- }
-
- // Format the results
- return formatVetRegressions(vetRegressions), nil
-}
-
-// parseVetJSON parses the JSON output from go vet -json
-func parseVetJSON(output []byte) (VetResults, error) {
- // The output contains multiple JSON objects, one per package
- // We need to parse them separately
- results := make(VetResults)
-
- // Process the output by collecting JSON chunks between # comment lines
- lines := strings.Split(string(output), "\n")
- currentChunk := strings.Builder{}
-
- // Helper function to process accumulated JSON chunks
- processChunk := func() {
- chunk := strings.TrimSpace(currentChunk.String())
- if chunk == "" || !strings.HasPrefix(chunk, "{") {
- return // Skip empty chunks or non-JSON chunks
- }
-
- // Try to parse the chunk as JSON
- var result VetResults
- if err := json.Unmarshal([]byte(chunk), &result); err != nil {
- return // Skip invalid JSON
- }
-
- // Merge with our results
- for pkg, issues := range result {
- results[pkg] = issues
- }
-
- // Reset the chunk builder
- currentChunk.Reset()
- }
-
- // Process lines
- for _, line := range lines {
- // If we hit a comment line, process the previous chunk and start a new one
- if strings.HasPrefix(strings.TrimSpace(line), "#") {
- processChunk()
- continue
- }
-
- // Add the line to the current chunk
- currentChunk.WriteString(line)
- currentChunk.WriteString("\n")
- }
-
- // Process the final chunk
- processChunk()
-
- return results, nil
-}
-
-// vetResultsHaveIssues checks if there are any actual issues in the vet results
-func vetResultsHaveIssues(results VetResults) bool {
- for _, pkgResult := range results {
- for _, issues := range pkgResult {
- if len(issues) > 0 {
- return true
- }
- }
- }
- return false
-}
-
-// findVetRegressions identifies vet issues that are new in the after state
-func findVetRegressions(before, after VetResults) VetResults {
- regressions := make(VetResults)
-
- // Go through all packages in the after state
- for pkgPath, afterPkgResults := range after {
- beforePkgResults, pkgExistedBefore := before[pkgPath]
-
- // Initialize package in regressions if it has issues
- if !pkgExistedBefore {
- // If the package didn't exist before, all issues are new
- regressions[pkgPath] = afterPkgResults
- continue
- }
-
- // Compare issues by category
- for category, afterIssues := range afterPkgResults {
- beforeIssues, categoryExistedBefore := beforePkgResults[category]
-
- if !categoryExistedBefore {
- // If this category didn't exist before, all issues are new
- if regressions[pkgPath] == nil {
- regressions[pkgPath] = make(VetResult)
- }
- regressions[pkgPath][category] = afterIssues
- continue
- }
-
- // Compare individual issues
- var newIssues []VetIssue
- for _, afterIssue := range afterIssues {
- if !issueExistsIn(afterIssue, beforeIssues) {
- newIssues = append(newIssues, afterIssue)
- }
- }
-
- // Add new issues to regressions
- if len(newIssues) > 0 {
- if regressions[pkgPath] == nil {
- regressions[pkgPath] = make(VetResult)
- }
- regressions[pkgPath][category] = newIssues
- }
- }
- }
-
- return regressions
-}
-
-// issueExistsIn checks if an issue already exists in a list of issues
-// using a looser comparison that's resilient to position changes
-func issueExistsIn(issue VetIssue, issues []VetIssue) bool {
- issueFile := extractFilePath(issue.Position)
-
- for _, existing := range issues {
- // Main comparison is by message content, which is likely stable
- if issue.Message == existing.Message {
- // If messages match exactly, consider it the same issue even if position changed
- return true
- }
-
- // As a secondary check, if the issue is in the same file and has similar message,
- // it's likely the same issue that might have been slightly reworded or relocated
- existingFile := extractFilePath(existing.Position)
- if issueFile == existingFile && messagesSimilar(issue.Message, existing.Message) {
- return true
- }
- }
- return false
-}
-
-// extractFilePath gets just the file path from a position string like "/path/to/file.go:10:15"
-func extractFilePath(position string) string {
- parts := strings.Split(position, ":")
- if len(parts) >= 1 {
- return parts[0]
- }
- return position // fallback to the full position if we can't parse it
-}
-
-// messagesSimilar checks if two messages are similar enough to be considered the same issue
-// This is a simple implementation that could be enhanced with more sophisticated text comparison
-func messagesSimilar(msg1, msg2 string) bool {
- // For now, simple similarity check: if one is a substring of the other
- return strings.Contains(msg1, msg2) || strings.Contains(msg2, msg1)
-}
-
-// formatVetRegressions generates a human-readable summary of vet regressions
-func formatVetRegressions(regressions VetResults) string {
- if !vetResultsHaveIssues(regressions) {
- return ""
- }
-
- var sb strings.Builder
- sb.WriteString("Go vet issues detected:\n\n")
-
- // Get sorted list of packages for deterministic output
- pkgPaths := make([]string, 0, len(regressions))
- for pkgPath := range regressions {
- pkgPaths = append(pkgPaths, pkgPath)
- }
- slices.Sort(pkgPaths)
-
- issueCount := 1
- for _, pkgPath := range pkgPaths {
- pkgResult := regressions[pkgPath]
-
- // Get sorted list of categories
- categories := make([]string, 0, len(pkgResult))
- for category := range pkgResult {
- categories = append(categories, category)
- }
- slices.Sort(categories)
-
- for _, category := range categories {
- issues := pkgResult[category]
-
- // Skip empty issue lists (shouldn't happen, but just in case)
- if len(issues) == 0 {
- continue
- }
-
- // Sort issues by position for deterministic output
- slices.SortFunc(issues, func(a, b VetIssue) int {
- return strings.Compare(a.Position, b.Position)
- })
-
- // Format each issue
- for _, issue := range issues {
- sb.WriteString(fmt.Sprintf("%d. [%s] %s: %s\n",
- issueCount,
- category,
- issue.Position,
- issue.Message))
- issueCount++
- }
- }
- }
-
- sb.WriteString("\nPlease fix these issues before proceeding.")
- return sb.String()
-}
-
// GoplsIssue represents a single issue reported by gopls check
type GoplsIssue struct {
Position string // File position in format "file:line:col-range"
@@ -496,7 +212,7 @@
}
// Parse the output
- afterIssues := parseGoplsOutput(afterGoplsOut)
+ afterIssues := parseGoplsOutput(r.repoRoot, afterGoplsOut)
// If no issues were found, we're done
if len(afterIssues) == 0 {
@@ -530,22 +246,18 @@
}
// Run gopls check on the files that existed in the initial commit
- beforeIssues := []GoplsIssue{}
+ var beforeIssues []GoplsIssue
if len(initialFilesToCheck) > 0 {
beforeGoplsArgs := append([]string{"check"}, initialFilesToCheck...)
beforeGoplsCmd := exec.CommandContext(ctx, "gopls", beforeGoplsArgs...)
beforeGoplsCmd.Dir = r.initialWorktree
- var beforeGoplsOut []byte
- var beforeCmdErr error
- beforeGoplsOut, beforeCmdErr = beforeGoplsCmd.CombinedOutput()
+ beforeGoplsOut, beforeCmdErr := beforeGoplsCmd.CombinedOutput()
if beforeCmdErr != nil && !looksLikeGoplsIssues(beforeGoplsOut) {
// If gopls fails to run properly on the initial commit, log a warning and continue
// with empty before issues - this will be conservative and report more issues
- slog.WarnContext(ctx, "CodeReviewer.checkGopls: gopls check failed on initial commit",
- "err", err, "output", string(beforeGoplsOut))
- // Continue with empty beforeIssues
+ slog.WarnContext(ctx, "CodeReviewer.checkGopls: gopls check failed on initial commit", "err", err, "output", string(beforeGoplsOut))
} else {
- beforeIssues = parseGoplsOutput(beforeGoplsOut)
+ beforeIssues = parseGoplsOutput(r.initialWorktree, beforeGoplsOut)
}
}
@@ -556,12 +268,12 @@
}
// Format the results
- return formatGoplsRegressions(goplsRegressions), nil
+ return r.formatGoplsRegressions(goplsRegressions), nil
}
// parseGoplsOutput parses the text output from gopls check
// Each line has the format: '/path/to/file.go:448:22-26: unused parameter: path'
-func parseGoplsOutput(output []byte) []GoplsIssue {
+func parseGoplsOutput(root string, output []byte) []GoplsIssue {
var issues []GoplsIssue
lines := strings.Split(string(output), "\n")
@@ -600,6 +312,10 @@
// Extract position and message
position := line[:messageStart]
+ rel, err := filepath.Rel(root, position)
+ if err == nil {
+ position = rel
+ }
message := line[messageStart+2:] // Skip the ': ' separator
// Verify position has the expected format (at least 2 colons for line:col)
@@ -717,7 +433,7 @@
}
// formatGoplsRegressions generates a human-readable summary of gopls check regressions
-func formatGoplsRegressions(regressions []GoplsIssue) string {
+func (r *CodeReviewer) formatGoplsRegressions(regressions []GoplsIssue) string {
if len(regressions) == 0 {
return ""
}
@@ -727,11 +443,11 @@
// Format each issue
for i, issue := range regressions {
- sb.WriteString(fmt.Sprintf("%d. %s: %s\n", i+1, issue.Position, issue.Message))
+ sb.WriteString(fmt.Sprintf("%d. %s: %s\n", i+1, filepath.Join(r.repoRoot, issue.Position), issue.Message))
}
- sb.WriteString("\nIMPORTANT: Only fix new gopls check issues in parts of the code that you have already edited. ")
- sb.WriteString("Do not change existing code that was not part of your current edits.")
+ sb.WriteString("\nIMPORTANT: Only fix new gopls check issues in parts of the code that you have already edited.")
+ sb.WriteString(" Do not change existing code that was not part of your current edits.\n")
return sb.String()
}
@@ -748,10 +464,10 @@
// 2. allPkgs: all packages that might be affected, including downstream packages that depend on the direct packages
// It may include false positives.
// Files must be absolute paths!
-func (r *CodeReviewer) packagesForFiles(ctx context.Context, files []string) (directPkgs, allPkgs map[string]*packages.Package, err error) {
+func (r *CodeReviewer) packagesForFiles(ctx context.Context, files []string) (allPkgs map[string]*packages.Package, err error) {
for _, f := range files {
if !filepath.IsAbs(f) {
- return nil, nil, fmt.Errorf("path %q is not absolute", f)
+ return nil, fmt.Errorf("path %q is not absolute", f)
}
}
cfg := &packages.Config{
@@ -767,14 +483,12 @@
}
universe, err := packages.Load(cfg, "./...")
if err != nil {
- return nil, nil, err
+ return nil, err
}
// Identify packages that directly contain the changed files
- directPkgs = make(map[string]*packages.Package) // import path -> package
+ directPkgs := make(map[string]*packages.Package) // import path -> package
for _, pkg := range universe {
- // fmt.Println("pkg:", pkg.PkgPath)
pkgFiles := allFiles(pkg)
- // fmt.Println("pkgFiles:", pkgFiles)
for _, file := range files {
if pkgFiles[file] {
// prefer test packages, as they contain strictly more files (right?)
@@ -786,27 +500,35 @@
}
}
- // Create a copy of directPkgs to expand with dependencies
- allPkgs = make(map[string]*packages.Package)
- for k, v := range directPkgs {
- allPkgs[k] = v
- }
+ allPkgs = maps.Clone(directPkgs)
// Add packages that depend on the direct packages
addDependentPackages(universe, allPkgs)
- return directPkgs, allPkgs, nil
+ return allPkgs, nil
}
// allFiles returns all files that might be referenced by the package.
// It may contain false positives.
func allFiles(p *packages.Package) map[string]bool {
files := make(map[string]bool)
+ // Add files from package info
add := [][]string{p.GoFiles, p.CompiledGoFiles, p.OtherFiles, p.EmbedFiles, p.IgnoredFiles}
for _, extra := range add {
for _, file := range extra {
files[file] = true
}
}
+ // Add files from testdata directory
+ testdataDir := filepath.Join(p.Dir, "testdata")
+ if _, err := os.Stat(testdataDir); err == nil {
+ fsys := os.DirFS(p.Dir)
+ fs.WalkDir(fsys, "testdata", func(path string, d fs.DirEntry, err error) error {
+ if err == nil && !d.IsDir() {
+ files[filepath.Join(p.Dir, path)] = true
+ }
+ return nil
+ })
+ }
return files
}
@@ -816,6 +538,10 @@
for {
changed := false
for _, p := range universe {
+ if strings.HasSuffix(p.PkgPath, ".test") { // ick, but I don't see another way
+ // skip test packages
+ continue
+ }
if _, ok := pkgs[p.PkgPath]; ok {
// already in pkgs
continue
@@ -910,146 +636,151 @@
// testStatus represents the status of a test in a given commit
type testStatus int
+//go:generate go tool stringer -type=testStatus -trimprefix=testStatus
const (
testStatusUnknown testStatus = iota
testStatusPass
testStatusFail
testStatusBuildFail
testStatusSkip
+ testStatusNoTests // no tests exist for this package
)
-// testInfo represents information about a specific test
-type testInfo struct {
- Package string
- Test string // empty for package tests
-}
-
-// String returns a human-readable string representation of the test
-func (t testInfo) String() string {
- if t.Test == "" {
- return t.Package
- }
- return fmt.Sprintf("%s.%s", t.Package, t.Test)
-}
-
// testRegression represents a test that regressed between commits
type testRegression struct {
- Info testInfo
+ Package string
+ Test string // empty for package tests
BeforeStatus testStatus
AfterStatus testStatus
Output string // failure output in the after state
}
-// collectTestStatuses processes a slice of test events and returns a map of test statuses
-func collectTestStatuses(results []testJSON) map[testInfo]testStatus {
- statuses := make(map[testInfo]testStatus)
- failedBuilds := make(map[string]bool) // track packages with build failures
- testOutputs := make(map[testInfo][]string) // collect output for failing tests
-
- // First pass: identify build failures
- for _, result := range results {
- if result.Action == "fail" && result.FailedBuild != "" {
- failedBuilds[result.FailedBuild] = true
- }
+func (r *testRegression) Source() string {
+ if r.Test == "" {
+ return r.Package
}
+ return fmt.Sprintf("%s.%s", r.Package, r.Test)
+}
- // Second pass: collect test statuses
- for _, result := range results {
- info := testInfo{Package: result.Package, Test: result.Test}
+type packageResult struct {
+ Status testStatus // overall status for the package
+ TestStatus map[string]testStatus // name -> status
+ TestOutput map[string][]string // name -> output parts
+}
- // Skip output events for now, we'll process them in a separate pass
- if result.Action == "output" {
- if result.Test != "" { // only collect output for actual tests, not package messages
- testOutputs[info] = append(testOutputs[info], result.Output)
- }
- continue
+// collectTestStatuses processes a slice of test events and returns rich status information
+func collectTestStatuses(results []testJSON) map[string]*packageResult {
+ m := make(map[string]*packageResult)
+
+ for _, event := range results {
+ pkg := event.Package
+ p, ok := m[pkg]
+ if !ok {
+ p = new(packageResult)
+ p.TestStatus = make(map[string]testStatus)
+ p.TestOutput = make(map[string][]string)
+ m[pkg] = p
}
- // Handle BuildEvent output
- if result.Action == "build-fail" {
- // Mark all tests in this package as build failures
- for ti := range statuses {
- if ti.Package == result.ImportPath {
- statuses[ti] = testStatusBuildFail
- }
- }
+ switch event.Action {
+ case "output":
+ p.TestOutput[event.Test] = append(p.TestOutput[event.Test], event.Output)
continue
- }
-
- // Check if the package has a build failure
- if _, hasBuildFailure := failedBuilds[result.Package]; hasBuildFailure {
- statuses[info] = testStatusBuildFail
- continue
- }
-
- // Handle test events
- switch result.Action {
case "pass":
- statuses[info] = testStatusPass
+ if event.Test == "" {
+ p.Status = testStatusPass
+ } else {
+ p.TestStatus[event.Test] = testStatusPass
+ }
case "fail":
- statuses[info] = testStatusFail
+ if event.Test == "" {
+ if event.FailedBuild != "" {
+ p.Status = testStatusBuildFail
+ } else {
+ p.Status = testStatusFail
+ }
+ } else {
+ p.TestStatus[event.Test] = testStatusFail
+ }
case "skip":
- statuses[info] = testStatusSkip
+ if event.Test == "" {
+ p.Status = testStatusNoTests
+ } else {
+ p.TestStatus[event.Test] = testStatusSkip
+ }
}
}
- return statuses
+ return m
}
// compareTestResults identifies tests that have regressed between commits
func (r *CodeReviewer) compareTestResults(beforeResults, afterResults []testJSON) ([]testRegression, error) {
- beforeStatuses := collectTestStatuses(beforeResults)
- afterStatuses := collectTestStatuses(afterResults)
+ before := collectTestStatuses(beforeResults)
+ after := collectTestStatuses(afterResults)
+ var testLevelRegressions []testRegression
+ var packageLevelRegressions []testRegression
- // Collect output for failing tests
- testOutputMap := make(map[testInfo]string)
- for _, result := range afterResults {
- if result.Action == "output" {
- info := testInfo{Package: result.Package, Test: result.Test}
- testOutputMap[info] += result.Output
+ afterPkgs := slices.Sorted(maps.Keys(after))
+ for _, pkg := range afterPkgs {
+ afterResult := after[pkg]
+ afterStatus := afterResult.Status
+ // Can't short-circuit here when tests are passing, because we need to check for skipped tests.
+ beforeResult, ok := before[pkg]
+ beforeStatus := testStatusNoTests
+ if ok {
+ beforeStatus = beforeResult.Status
+ }
+ // If things no longer build, stop at the package level.
+ // Otherwise, proceed to the test level, so we have more precise information.
+ if afterStatus == testStatusBuildFail && beforeStatus != testStatusBuildFail {
+ packageLevelRegressions = append(packageLevelRegressions, testRegression{
+ Package: pkg,
+ BeforeStatus: beforeStatus,
+ AfterStatus: afterStatus,
+ })
+ continue
+ }
+ tests := slices.Sorted(maps.Keys(afterResult.TestStatus))
+ for _, test := range tests {
+ afterStatus := afterResult.TestStatus[test]
+ switch afterStatus {
+ case testStatusPass:
+ continue
+ case testStatusUnknown:
+ slog.WarnContext(context.Background(), "unknown test status", "package", pkg, "test", test)
+ continue
+ }
+ beforeStatus := beforeResult.TestStatus[test]
+ if isRegression(beforeStatus, afterStatus) {
+ testLevelRegressions = append(testLevelRegressions, testRegression{
+ Package: pkg,
+ Test: test,
+ BeforeStatus: beforeStatus,
+ AfterStatus: afterStatus,
+ Output: strings.Join(afterResult.TestOutput[test], ""),
+ })
+ }
}
}
+ // If we have test-level regressions, report only those
+ // Otherwise, report package-level regressions
var regressions []testRegression
-
- // Look for tests that regressed
- for info, afterStatus := range afterStatuses {
- // Skip tests that are passing or skipped in the after state
- if afterStatus == testStatusPass || afterStatus == testStatusSkip {
- continue
- }
-
- // Get the before status (default to unknown if not present)
- beforeStatus, exists := beforeStatuses[info]
- if !exists {
- beforeStatus = testStatusUnknown
- }
-
- // Log warning if we encounter unexpected unknown status in the 'after' state
- if afterStatus == testStatusUnknown {
- slog.WarnContext(context.Background(), "Unexpected unknown test status encountered",
- "package", info.Package, "test", info.Test)
- }
-
- // Check for regressions
- if isRegression(beforeStatus, afterStatus) {
- regressions = append(regressions, testRegression{
- Info: info,
- BeforeStatus: beforeStatus,
- AfterStatus: afterStatus,
- Output: testOutputMap[info],
- })
- }
+ if len(testLevelRegressions) > 0 {
+ regressions = testLevelRegressions
+ } else {
+ regressions = packageLevelRegressions
}
// Sort regressions for consistent output
slices.SortFunc(regressions, func(a, b testRegression) int {
// First by package
- if c := strings.Compare(a.Info.Package, b.Info.Package); c != 0 {
+ if c := strings.Compare(a.Package, b.Package); c != 0 {
return c
}
// Then by test name
- return strings.Compare(a.Info.Test, b.Info.Test)
+ return strings.Compare(a.Test, b.Test)
})
return regressions, nil
@@ -1058,13 +789,34 @@
// badnessLevels maps test status to a badness level
// Higher values indicate worse status (more severe issues)
var badnessLevels = map[testStatus]int{
- testStatusBuildFail: 4, // Worst
- testStatusFail: 3,
- testStatusSkip: 2,
+ testStatusBuildFail: 5, // Worst
+ testStatusFail: 4,
+ testStatusSkip: 3,
+ testStatusNoTests: 2,
testStatusPass: 1,
testStatusUnknown: 0, // Least bad - avoids false positives
}
+// regressionFormatter defines a mapping of before/after state pairs to descriptive messages
+type regressionKey struct {
+ before, after testStatus
+}
+
+var regressionMessages = map[regressionKey]string{
+ {testStatusUnknown, testStatusBuildFail}: "New test has build/vet errors",
+ {testStatusUnknown, testStatusFail}: "New test is failing",
+ {testStatusUnknown, testStatusSkip}: "New test is skipped",
+ {testStatusPass, testStatusBuildFail}: "Was passing, now has build/vet errors",
+ {testStatusPass, testStatusFail}: "Was passing, now failing",
+ {testStatusPass, testStatusSkip}: "Was passing, now skipped",
+ {testStatusNoTests, testStatusBuildFail}: "Previously had no tests, now has build/vet errors",
+ {testStatusNoTests, testStatusFail}: "Previously had no tests, now has failing tests",
+ {testStatusNoTests, testStatusSkip}: "Previously had no tests, now has skipped tests",
+ {testStatusSkip, testStatusBuildFail}: "Was skipped, now has build/vet errors",
+ {testStatusSkip, testStatusFail}: "Was skipped, now failing",
+ {testStatusFail, testStatusBuildFail}: "Was failing, now has build/vet errors",
+}
+
// isRegression determines if a test has regressed based on before and after status
// A regression is defined as an increase in badness level
func isRegression(before, after testStatus) bool {
@@ -1078,48 +830,18 @@
return ""
}
- var sb strings.Builder
- sb.WriteString(fmt.Sprintf("Test regressions detected between initial commit (%s) and HEAD:\n\n", r.initialCommit))
+ buf := new(strings.Builder)
+ fmt.Fprintf(buf, "Test regressions detected between initial commit (%s) and HEAD:\n\n", r.initialCommit)
for i, reg := range regressions {
- // Describe the regression
- sb.WriteString(fmt.Sprintf("%d. %s: ", i+1, reg.Info.String()))
-
- switch {
- case reg.BeforeStatus == testStatusUnknown && reg.AfterStatus == testStatusFail:
- sb.WriteString("New test is failing")
- case reg.BeforeStatus == testStatusUnknown && reg.AfterStatus == testStatusBuildFail:
- sb.WriteString("New test has build errors")
- case reg.BeforeStatus == testStatusPass && reg.AfterStatus == testStatusFail:
- sb.WriteString("Was passing, now failing")
- case reg.BeforeStatus == testStatusPass && reg.AfterStatus == testStatusBuildFail:
- sb.WriteString("Was passing, now has build errors")
- case reg.BeforeStatus == testStatusSkip && reg.AfterStatus == testStatusFail:
- sb.WriteString("Was skipped, now failing")
- case reg.BeforeStatus == testStatusSkip && reg.AfterStatus == testStatusBuildFail:
- sb.WriteString("Was skipped, now has build errors")
- default:
- sb.WriteString("Regression detected")
+ fmt.Fprintf(buf, "%d: %v: ", i+1, reg.Source())
+ key := regressionKey{reg.BeforeStatus, reg.AfterStatus}
+ message, exists := regressionMessages[key]
+ if !exists {
+ message = "Regression detected"
}
- sb.WriteString("\n")
-
- // Add failure output with indentation for readability
- if reg.Output != "" {
- outputLines := strings.Split(strings.TrimSpace(reg.Output), "\n")
- // Limit output to first 10 lines to avoid overwhelming feedback
- shownLines := min(len(outputLines), 10)
-
- sb.WriteString(" Output:\n")
- for _, line := range outputLines[:shownLines] {
- sb.WriteString(fmt.Sprintf(" | %s\n", line))
- }
- if shownLines < len(outputLines) {
- sb.WriteString(fmt.Sprintf(" | ... (%d more lines)\n", len(outputLines)-shownLines))
- }
- }
- sb.WriteString("\n")
+ fmt.Fprintf(buf, "%s\n", message)
}
- sb.WriteString("Please fix these test failures before proceeding.")
- return sb.String()
+ return buf.String()
}
diff --git a/claudetool/testdata/add_skipped_test.txtar b/claudetool/testdata/add_skipped_test.txtar
new file mode 100644
index 0000000..95000cc
--- /dev/null
+++ b/claudetool/testdata/add_skipped_test.txtar
@@ -0,0 +1,27 @@
+Adding a skipped test is a regression
+
+-- p_test.go --
+package p
+
+-- .commit --
+Initial commit with no tests
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+ t.SkipNow()
+}
+
+-- .commit --
+Add skipped test
+
+-- .run_test --
+Test regressions detected between initial commit (2cdfdc5009e364add9a263909472f85eb08480ed) and HEAD:
+
+1: sketch.dev.TestP: New test is skipped
+
+
+Please fix before proceeding.
diff --git a/claudetool/testdata/basic_gofmt.txtar b/claudetool/testdata/basic_gofmt.txtar
new file mode 100644
index 0000000..c32ebe9
--- /dev/null
+++ b/claudetool/testdata/basic_gofmt.txtar
@@ -0,0 +1,13 @@
+Test autoformatting happens
+
+-- .commit --
+Initial commit
+
+-- p.go --
+ package p
+
+-- .commit --
+Add non-gofmt file
+
+-- .run_autoformat --
+/PATH/TO/REPO/p.go
diff --git a/claudetool/testdata/empty_testdir_add_file.txtar b/claudetool/testdata/empty_testdir_add_file.txtar
new file mode 100644
index 0000000..b754f3d
--- /dev/null
+++ b/claudetool/testdata/empty_testdir_add_file.txtar
@@ -0,0 +1,37 @@
+Test adding a non-empty file to testdata when test expects empty files
+
+-- p_test.go --
+package p
+
+import (
+ "os"
+ "path/filepath"
+ "testing"
+)
+
+func TestEmptyTestdata(t *testing.T) {
+ files, _ := filepath.Glob("testdata/*")
+ for _, path := range files {
+ data, _ := os.ReadFile(path)
+ if len(data) > 0 {
+ t.Fatalf("testdata file is not empty: %s", path)
+ }
+ }
+}
+
+-- testdata/empty --
+-- .commit --
+Initial commit with empty testdata file
+
+-- testdata/nonempty --
+hi
+-- .commit --
+Add non-empty file to testdata
+
+-- .run_test --
+Test regressions detected between initial commit (cdc9ec6dfb8669c11d8aa0df49c72112627784dc) and HEAD:
+
+1: sketch.dev.TestEmptyTestdata: Was passing, now failing
+
+
+Please fix before proceeding.
diff --git a/claudetool/testdata/failing_to_failing.txtar b/claudetool/testdata/failing_to_failing.txtar
new file mode 100644
index 0000000..eea1b59
--- /dev/null
+++ b/claudetool/testdata/failing_to_failing.txtar
@@ -0,0 +1,28 @@
+Going from failing tests to failing tests is not a regression
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestX(t *testing.T) {
+ t.FailNow()
+}
+
+-- .commit --
+Initial commit (with failing test)
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestX(t *testing.T) {
+ t.FailNow()
+}
+
+-- .commit --
+Test still failing
+
+-- .run_test --
+OK
diff --git a/claudetool/testdata/fmt_hierarchy.txtar b/claudetool/testdata/fmt_hierarchy.txtar
new file mode 100644
index 0000000..613d716
--- /dev/null
+++ b/claudetool/testdata/fmt_hierarchy.txtar
@@ -0,0 +1,58 @@
+Test autoformatting hierarchy
+
+-- bad.go --
+// This file is not properly gofmt'd.
+ package main
+-- gofmt.go --
+// This file is properly gofmt'd.
+package main
+-- goimports.go --
+// This file is properly goimport'd, but not gofumpt'd.
+package main
+
+import (
+ "fmt"
+)
+
+func main() {
+
+ fmt.Println("hi")
+}
+-- gofumpt.go --
+// This file is properly gofumpt'd.
+package main
+-- .commit --
+Initial commit
+
+-- bad.go --
+// This file is still not gofmt'd, but we won't complain, because it was already not gofmt'd.
+ package main
+
+ import "fmt"
+-- gofmt.go --
+// This file is no longer properly gofmt'd.
+ package main
+-- goimports.go --
+// If we remove the imports, it no longer is goimport'd.
+// It should thus be flagged as a formatting issue, despite being gofmt'd.
+package main
+
+func main() {
+
+ fmt.Println("hi")
+}
+-- gofumpt.go --
+// This file is properly gofmt'd, but no longer gofumpt'd.
+package main
+
+func main() {
+
+ fmt.Println("hi")
+}
+-- .commit --
+Mess with formatting
+
+-- .run_autoformat --
+/PATH/TO/REPO/gofmt.go
+/PATH/TO/REPO/gofumpt.go
+/PATH/TO/REPO/goimports.go
diff --git a/claudetool/testdata/gopls_issue_unchanged.txtar b/claudetool/testdata/gopls_issue_unchanged.txtar
new file mode 100644
index 0000000..d1de43b
--- /dev/null
+++ b/claudetool/testdata/gopls_issue_unchanged.txtar
@@ -0,0 +1,25 @@
+Pre-existing gopls issues should not be reported as regressions
+
+-- p.go --
+package p
+
+func F() {
+ x := 42
+}
+
+-- .commit --
+Initial commit with existing gopls issue
+
+-- p.go --
+package p
+
+func F() {
+ x := 42
+ // unrelated change
+}
+
+-- .commit --
+Make a change but keep the same gopls issue
+
+-- .run_test --
+OK
diff --git a/claudetool/testdata/gopls_issues.txtar b/claudetool/testdata/gopls_issues.txtar
new file mode 100644
index 0000000..56f1557
--- /dev/null
+++ b/claudetool/testdata/gopls_issues.txtar
@@ -0,0 +1,25 @@
+Detect newly introduced gopls issues
+
+-- .commit --
+Initial commit
+
+-- p.go --
+package p
+
+func F() {
+ return
+ panic("unreachable")
+}
+
+-- .commit --
+Add file with gopls issues
+
+-- .run_test --
+Gopls check issues detected:
+
+1. /PATH/TO/REPO/p.go:5:2-22: unreachable code
+
+IMPORTANT: Only fix new gopls check issues in parts of the code that you have already edited. Do not change existing code that was not part of your current edits.
+
+
+Please fix before proceeding.
diff --git a/claudetool/testdata/gopls_line_number_changed.txtar b/claudetool/testdata/gopls_line_number_changed.txtar
new file mode 100644
index 0000000..c7eeb37
--- /dev/null
+++ b/claudetool/testdata/gopls_line_number_changed.txtar
@@ -0,0 +1,26 @@
+Pre-existing gopls issues with line number changes should not be reported as regressions
+
+-- p.go --
+package p
+
+func F() {
+ x := 42
+}
+
+-- .commit --
+Initial commit with existing gopls issue
+
+-- p.go --
+package p
+
+// Change the line number of the unused variable
+
+func F() {
+ x := 42
+}
+
+-- .commit --
+Add comments that change line numbers
+
+-- .run_test --
+OK
diff --git a/claudetool/testdata/gopls_new_issue_with_existing.txtar b/claudetool/testdata/gopls_new_issue_with_existing.txtar
new file mode 100644
index 0000000..d47fb02
--- /dev/null
+++ b/claudetool/testdata/gopls_new_issue_with_existing.txtar
@@ -0,0 +1,36 @@
+Only new gopls issues should be reported when existing issues are present
+
+-- p.go --
+package p
+
+func F() {
+ x := 42
+}
+
+-- .commit --
+Initial commit with existing gopls issue
+
+-- p.go --
+package p
+
+func F() {
+ x := 42
+}
+
+func G() {
+ panic("i went down down down")
+ panic("burning ring of fire")
+}
+
+-- .commit --
+Add a new function with a new gopls issue
+
+-- .run_test --
+Gopls check issues detected:
+
+1. /PATH/TO/REPO/p.go:9:2-31: unreachable code
+
+IMPORTANT: Only fix new gopls check issues in parts of the code that you have already edited. Do not change existing code that was not part of your current edits.
+
+
+Please fix before proceeding.
diff --git a/claudetool/testdata/mark_test_skipped.txtar b/claudetool/testdata/mark_test_skipped.txtar
new file mode 100644
index 0000000..bf887e3
--- /dev/null
+++ b/claudetool/testdata/mark_test_skipped.txtar
@@ -0,0 +1,32 @@
+Marking a passing test as skipped test is a regression
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+}
+
+-- .commit --
+Initial commit
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+ t.SkipNow()
+}
+
+-- .commit --
+Skip test
+
+-- .run_test --
+Test regressions detected between initial commit (851ba0628e0e2d93e27e168d6af336f3e4d375c7) and HEAD:
+
+1: sketch.dev.TestP: Was passing, now skipped
+
+
+Please fix before proceeding.
diff --git a/claudetool/testdata/multi_commit_review.txtar b/claudetool/testdata/multi_commit_review.txtar
new file mode 100644
index 0000000..2286dee
--- /dev/null
+++ b/claudetool/testdata/multi_commit_review.txtar
@@ -0,0 +1,51 @@
+Test that all issues across multiple commits are reported
+
+-- a.go --
+package main
+
+-- .commit --
+Initial commit
+
+-- b.go --
+package main
+
+import "sync"
+
+var x sync.Mutex
+var y = x
+
+-- .commit --
+Add another file
+
+-- c_test.go --
+package main
+
+import "testing"
+
+func TestX(t *testing.T) {
+ t.FailNow()
+}
+
+-- .commit --
+Add a failing test
+
+-- d.go --
+package main
+
+-- .commit --
+Add yet another file
+
+-- .run_test --
+Test regressions detected between initial commit (a4fd3c2166b35e02a2cde466880e45aea6e6212e) and HEAD:
+
+1: sketch.dev.TestX: New test is failing
+
+
+Gopls check issues detected:
+
+1. /PATH/TO/REPO/b.go:6:9-10: variable declaration copies lock value to y: sync.Mutex
+
+IMPORTANT: Only fix new gopls check issues in parts of the code that you have already edited. Do not change existing code that was not part of your current edits.
+
+
+Please fix before proceeding.
diff --git a/claudetool/testdata/multiple_format_issues.txtar b/claudetool/testdata/multiple_format_issues.txtar
new file mode 100644
index 0000000..0024a48
--- /dev/null
+++ b/claudetool/testdata/multiple_format_issues.txtar
@@ -0,0 +1,21 @@
+Test that formatting issues are not reported if pre-existing, per-file
+
+-- main.go --
+ package main
+
+-- .commit --
+Initial commit
+
+-- main.go --
+ package main
+
+ import "fmt"
+
+-- file1.go --
+ package main
+
+-- .commit --
+Add multiple files with formatting issues
+
+-- .run_autoformat --
+/PATH/TO/REPO/file1.go
diff --git a/claudetool/testdata/new_test_build_error.txtar b/claudetool/testdata/new_test_build_error.txtar
new file mode 100644
index 0000000..2fe1d34
--- /dev/null
+++ b/claudetool/testdata/new_test_build_error.txtar
@@ -0,0 +1,27 @@
+Adding a new test with build errors is a regression
+
+-- p.go --
+package p
+
+-- .commit --
+Initial commit with no tests
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+ (
+}
+
+-- .commit --
+Add test with build error
+
+-- .run_test --
+Test regressions detected between initial commit (cf75ed70e940ce33a17a70b1894ad053b67731c0) and HEAD:
+
+1: sketch.dev: Previously had no tests, now has build/vet errors
+
+
+Please fix before proceeding.
diff --git a/claudetool/testdata/no_fmt_autogenerated.txtar b/claudetool/testdata/no_fmt_autogenerated.txtar
new file mode 100644
index 0000000..aab3f57
--- /dev/null
+++ b/claudetool/testdata/no_fmt_autogenerated.txtar
@@ -0,0 +1,22 @@
+Test that autogenerated files do not get formatted
+
+-- .commit --
+Initial commit
+
+-- generated.go --
+// Code generated DO NOT EDIT.
+
+package main
+
+import (
+_ "fmt"
+)
+
+-- manual.go --
+ package main
+
+-- .commit --
+Add generated and manual files with poor formatting
+
+-- .run_autoformat --
+/PATH/TO/REPO/manual.go
diff --git a/claudetool/testdata/no_tests.txtar b/claudetool/testdata/no_tests.txtar
new file mode 100644
index 0000000..ceeb2a5
--- /dev/null
+++ b/claudetool/testdata/no_tests.txtar
@@ -0,0 +1,19 @@
+Going from no tests to no tests is not a regression
+
+-- p.go --
+package p
+
+-- .commit --
+Initial commit
+
+-- other.go --
+package p
+
+func F() {
+}
+
+-- .commit --
+Add func
+
+-- .run_test --
+OK
diff --git a/claudetool/testdata/no_tests_to_failing_tests.txtar b/claudetool/testdata/no_tests_to_failing_tests.txtar
new file mode 100644
index 0000000..b0cc956
--- /dev/null
+++ b/claudetool/testdata/no_tests_to_failing_tests.txtar
@@ -0,0 +1,27 @@
+Adding failing tests is a regression
+
+-- p.go --
+package p
+
+-- .commit --
+Initial commit with no tests
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+ t.FailNow()
+}
+
+-- .commit --
+Add failing test
+
+-- .run_test --
+Test regressions detected between initial commit (cf75ed70e940ce33a17a70b1894ad053b67731c0) and HEAD:
+
+1: sketch.dev.TestP: New test is failing
+
+
+Please fix before proceeding.
diff --git a/claudetool/testdata/passing_to_build_error.txtar b/claudetool/testdata/passing_to_build_error.txtar
new file mode 100644
index 0000000..9558862
--- /dev/null
+++ b/claudetool/testdata/passing_to_build_error.txtar
@@ -0,0 +1,32 @@
+Going from a passing test to a test with build errors is a regression
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+}
+
+-- .commit --
+Initial commit with passing test
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+ (
+}
+
+-- .commit --
+Break test with build error
+
+-- .run_test --
+Test regressions detected between initial commit (149798f10aaf06bc4700bb62d46ed6d8d88e35ad) and HEAD:
+
+1: sketch.dev: Was passing, now has build/vet errors
+
+
+Please fix before proceeding.
diff --git a/claudetool/testdata/passing_to_failing.txtar b/claudetool/testdata/passing_to_failing.txtar
new file mode 100644
index 0000000..31a0556
--- /dev/null
+++ b/claudetool/testdata/passing_to_failing.txtar
@@ -0,0 +1,32 @@
+Going from a passing test to a failing test is a regression
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+}
+
+-- .commit --
+Initial commit with passing test
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+ t.FailNow()
+}
+
+-- .commit --
+Break test
+
+-- .run_test --
+Test regressions detected between initial commit (149798f10aaf06bc4700bb62d46ed6d8d88e35ad) and HEAD:
+
+1: sketch.dev.TestP: Was passing, now failing
+
+
+Please fix before proceeding.
diff --git a/claudetool/testdata/passing_to_failing_subdir.txtar b/claudetool/testdata/passing_to_failing_subdir.txtar
new file mode 100644
index 0000000..7649554
--- /dev/null
+++ b/claudetool/testdata/passing_to_failing_subdir.txtar
@@ -0,0 +1,32 @@
+Going from a passing test to a failing test in a subdir is a regression
+
+-- foo/p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+}
+
+-- .commit --
+Initial commit with passing test
+
+-- foo/p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+ t.FailNow()
+}
+
+-- .commit --
+Break test
+
+-- .run_test --
+Test regressions detected between initial commit (8c6c71d0beac5e27b87dfc3f2fd9d274b62c3d3a) and HEAD:
+
+1: sketch.dev/foo.TestP: Was passing, now failing
+
+
+Please fix before proceeding.
diff --git a/claudetool/testdata/skipped_to_build_error.txtar b/claudetool/testdata/skipped_to_build_error.txtar
new file mode 100644
index 0000000..f648572
--- /dev/null
+++ b/claudetool/testdata/skipped_to_build_error.txtar
@@ -0,0 +1,33 @@
+Going from a skipped test to a test with build errors is a regression
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+ t.SkipNow()
+}
+
+-- .commit --
+Initial commit with skipped test
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+ (
+}
+
+-- .commit --
+Change skipped test to test with build error
+
+-- .run_test --
+Test regressions detected between initial commit (3635353dfefb2fa652728318567209ccfc4aba42) and HEAD:
+
+1: sketch.dev: Was passing, now has build/vet errors
+
+
+Please fix before proceeding.
diff --git a/claudetool/testdata/skipped_to_failing.txtar b/claudetool/testdata/skipped_to_failing.txtar
new file mode 100644
index 0000000..50348d2
--- /dev/null
+++ b/claudetool/testdata/skipped_to_failing.txtar
@@ -0,0 +1,33 @@
+Going from a skipped test to a failing test is a regression
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+ t.SkipNow()
+}
+
+-- .commit --
+Initial commit with skipped test
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+ t.FailNow()
+}
+
+-- .commit --
+Change skipped test to failing test
+
+-- .run_test --
+Test regressions detected between initial commit (3635353dfefb2fa652728318567209ccfc4aba42) and HEAD:
+
+1: sketch.dev.TestP: Was skipped, now failing
+
+
+Please fix before proceeding.
diff --git a/claudetool/testdata/vet_error_test.txtar b/claudetool/testdata/vet_error_test.txtar
new file mode 100644
index 0000000..8c9dc64
--- /dev/null
+++ b/claudetool/testdata/vet_error_test.txtar
@@ -0,0 +1,36 @@
+Check that vet errors are detected in a test run
+
+-- p.go --
+package p
+
+-- .commit --
+Initial commit
+
+-- p.go --
+package p
+
+import (
+ "fmt"
+)
+
+func F() {
+ fmt.Printf("Not a string: %s\n", 10)
+}
+
+-- .commit --
+Add vet error
+
+-- .run_test --
+Test regressions detected between initial commit (656e4d573cb77a30ebce29ebb974533ece282946) and HEAD:
+
+1: sketch.dev: Previously had no tests, now has build/vet errors
+
+
+Gopls check issues detected:
+
+1. /PATH/TO/REPO/p.go:8:2-38: fmt.Printf format %s has arg 10 of wrong type int
+
+IMPORTANT: Only fix new gopls check issues in parts of the code that you have already edited. Do not change existing code that was not part of your current edits.
+
+
+Please fix before proceeding.
diff --git a/claudetool/teststatus_string.go b/claudetool/teststatus_string.go
new file mode 100644
index 0000000..a5b159b
--- /dev/null
+++ b/claudetool/teststatus_string.go
@@ -0,0 +1,28 @@
+// Code generated by "stringer -type=testStatus -trimprefix=testStatus"; DO NOT EDIT.
+
+package claudetool
+
+import "strconv"
+
+func _() {
+ // An "invalid array index" compiler error signifies that the constant values have changed.
+ // Re-run the stringer command to generate them again.
+ var x [1]struct{}
+ _ = x[testStatusUnknown-0]
+ _ = x[testStatusPass-1]
+ _ = x[testStatusFail-2]
+ _ = x[testStatusBuildFail-3]
+ _ = x[testStatusSkip-4]
+ _ = x[testStatusNoTests-5]
+}
+
+const _testStatus_name = "UnknownPassFailBuildFailSkipNoTests"
+
+var _testStatus_index = [...]uint8{0, 7, 11, 15, 24, 28, 35}
+
+func (i testStatus) String() string {
+ if i < 0 || i >= testStatus(len(_testStatus_index)-1) {
+ return "testStatus(" + strconv.FormatInt(int64(i), 10) + ")"
+ }
+ return _testStatus_name[_testStatus_index[i]:_testStatus_index[i+1]]
+}