claudetool: improve codereview

Do a bunch of un-vibecoding and bug fixing.
Unfortunately, lots left.
Get rid of vet; gopls check covers it.

Add testing infrastructure and a bunch of fixtures.
diff --git a/claudetool/codereview_test.go b/claudetool/codereview_test.go
new file mode 100644
index 0000000..71934a2
--- /dev/null
+++ b/claudetool/codereview_test.go
@@ -0,0 +1,290 @@
+package claudetool
+
+import (
+	"bytes"
+	"cmp"
+	"context"
+	"flag"
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"slices"
+	"strings"
+	"testing"
+
+	"golang.org/x/tools/txtar"
+)
+
+// updateTests is set to true when the -update flag is used.
+// This will update the expected test results instead of failing tests.
+var updateTests = flag.Bool("update", false, "update expected test results instead of failing tests")
+
+// TestCodereviewDifferential runs all the end-to-end tests for the codereview and differential packages.
+// Each test is defined in a .txtar file in the testdata directory.
+func TestCodereviewDifferential(t *testing.T) {
+	entries, err := os.ReadDir("testdata")
+	if err != nil {
+		t.Fatalf("failed to read testdata directory: %v", err)
+	}
+	for _, entry := range entries {
+		if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".txtar") {
+			continue
+		}
+		testPath := filepath.Join("testdata", entry.Name())
+		testName := strings.TrimSuffix(entry.Name(), ".txtar")
+		t.Run(testName, func(t *testing.T) {
+			t.Parallel()
+			runE2ETest(t, testPath, *updateTests)
+		})
+	}
+}
+
+// runE2ETest executes a single end-to-end test from a .txtar file.
+func runE2ETest(t *testing.T, testPath string, update bool) {
+	orig, err := os.ReadFile(testPath)
+	if err != nil {
+		t.Fatalf("failed to read test file %s: %v", testPath, err)
+	}
+	archive, err := txtar.ParseFile(testPath)
+	if err != nil {
+		t.Fatalf("failed to parse txtar file %s: %v", testPath, err)
+	}
+
+	tmpDir := t.TempDir()
+	// resolve temp dir path so that we can canonicalize/normalize it later
+	tmpDir = resolveRealPath(tmpDir)
+
+	if err := initGoModule(tmpDir); err != nil {
+		t.Fatalf("failed to initialize Go module: %v", err)
+	}
+	if err := initGitRepo(tmpDir); err != nil {
+		t.Fatalf("failed to initialize git repository: %v", err)
+	}
+	if err := processTestFiles(t, archive, tmpDir, update); err != nil {
+		t.Fatalf("error processing test files: %v", err)
+	}
+
+	// If we're updating, write back the modified archive to the file
+	if update {
+		updatedContent := txtar.Format(archive)
+		// only write back changes, avoids git status churn
+		if !bytes.Equal(orig, updatedContent) {
+			if err := os.WriteFile(testPath, updatedContent, 0o644); err != nil {
+				t.Errorf("Failed to update test file %s: %v", testPath, err)
+			}
+		}
+	}
+}
+
+func gitCommitEnv() []string {
+	return append(os.Environ(),
+		"GIT_AUTHOR_NAME=Test Author",
+		"GIT_AUTHOR_EMAIL=test@example.com",
+		"GIT_COMMITTER_NAME=Test Committer",
+		"GIT_COMMITTER_EMAIL=test@example.com",
+		"GIT_AUTHOR_DATE=2025-01-01T00:00:00Z",
+		"GIT_COMMITTER_DATE=2025-01-01T00:00:00Z",
+	)
+}
+
+// initGitRepo initializes a new git repository in the specified directory.
+func initGitRepo(dir string) error {
+	cmd := exec.Command("git", "init")
+	cmd.Dir = dir
+	err := cmd.Run()
+	if err != nil {
+		return fmt.Errorf("error initializing git repository: %w", err)
+	}
+	// create a single commit out of everything there now
+	cmd = exec.Command("git", "add", ".")
+	cmd.Dir = dir
+	err = cmd.Run()
+	if err != nil {
+		return fmt.Errorf("error staging files: %w", err)
+	}
+	cmd = exec.Command("git", "commit", "-m", "create repo")
+	cmd.Dir = dir
+	cmd.Env = gitCommitEnv()
+	err = cmd.Run()
+	if err != nil {
+		return fmt.Errorf("error making initial commit: %w", err)
+	}
+	return nil
+}
+
+// processTestFiles processes the files in the txtar archive in sequence.
+func processTestFiles(t *testing.T, archive *txtar.Archive, dir string, update bool) error {
+	var initialCommit string
+	filesForNextCommit := make(map[string]bool)
+
+	for i, file := range archive.Files {
+		switch file.Name {
+		case ".commit":
+			commit, err := makeGitCommit(dir, string(file.Data), filesForNextCommit)
+			if err != nil {
+				return fmt.Errorf("error making git commit: %w", err)
+			}
+			clear(filesForNextCommit)
+			initialCommit = cmp.Or(initialCommit, commit)
+			// fmt.Println("initial commit:", initialCommit)
+			// cmd := exec.Command("git", "log")
+			// cmd.Dir = dir
+			// cmd.Stdout = os.Stdout
+			// cmd.Run()
+
+		case ".run_test":
+			got, err := runDifferentialTest(dir, initialCommit)
+			if err != nil {
+				return fmt.Errorf("error running differential test: %w", err)
+			}
+			want := string(file.Data)
+
+			if update {
+				archive.Files[i].Data = []byte(got)
+				break
+			}
+			if strings.TrimSpace(want) != strings.TrimSpace(got) {
+				t.Errorf("Results don't match.\nExpected:\n%s\n\nActual:\n%s", want, got)
+			}
+
+		case ".run_autoformat":
+			if initialCommit == "" {
+				return fmt.Errorf("initial commit not set, cannot run autoformat")
+			}
+
+			got, err := runAutoformat(dir, initialCommit)
+			if err != nil {
+				return fmt.Errorf("error running autoformat: %w", err)
+			}
+			slices.Sort(got)
+
+			if update {
+				correct := strings.Join(got, "\n") + "\n"
+				archive.Files[i].Data = []byte(correct)
+				break
+			}
+
+			want := strings.Split(strings.TrimSpace(string(file.Data)), "\n")
+			if !slices.Equal(want, got) {
+				t.Errorf("Formatted files don't match.\nExpected:\n%s\n\nActual:\n%s", want, got)
+			}
+
+		default:
+			filePath := filepath.Join(dir, file.Name)
+			if err := os.MkdirAll(filepath.Dir(filePath), 0o700); err != nil {
+				return fmt.Errorf("error creating directory for %s: %w", file.Name, err)
+			}
+			data := file.Data
+			// Remove second trailing newline if present.
+			// An annoyance of the txtar format, messes with gofmt.
+			if bytes.HasSuffix(data, []byte("\n\n")) {
+				data = bytes.TrimSuffix(data, []byte("\n"))
+			}
+			if err := os.WriteFile(filePath, file.Data, 0o600); err != nil {
+				return fmt.Errorf("error writing file %s: %w", file.Name, err)
+			}
+			filesForNextCommit[file.Name] = true
+		}
+	}
+
+	return nil
+}
+
+// makeGitCommit commits the specified files with the given message.
+// Returns the commit hash.
+func makeGitCommit(dir, message string, files map[string]bool) (string, error) {
+	for file := range files {
+		cmd := exec.Command("git", "add", file)
+		cmd.Dir = dir
+		if err := cmd.Run(); err != nil {
+			return "", fmt.Errorf("error staging file %s: %w", file, err)
+		}
+	}
+	message = cmp.Or(message, "Test commit")
+
+	// Make the commit with fixed author, committer, and date for stable hashes
+	cmd := exec.Command("git", "commit", "--allow-empty", "-m", message)
+	cmd.Dir = dir
+	cmd.Env = gitCommitEnv()
+	if err := cmd.Run(); err != nil {
+		return "", fmt.Errorf("error making commit: %w", err)
+	}
+
+	// Get the commit hash
+	cmd = exec.Command("git", "rev-parse", "HEAD")
+	cmd.Dir = dir
+	out, err := cmd.Output()
+	if err != nil {
+		return "", fmt.Errorf("error getting commit hash: %w", err)
+	}
+
+	return strings.TrimSpace(string(out)), nil
+}
+
+// runDifferentialTest runs the code review tool on the repository and returns the result.
+func runDifferentialTest(dir, initialCommit string) (string, error) {
+	if initialCommit == "" {
+		return "", fmt.Errorf("initial commit not set, cannot run differential test")
+	}
+
+	// Create a code reviewer for the repository
+	ctx := context.Background()
+	reviewer, err := NewCodeReviewer(ctx, dir, initialCommit)
+	if err != nil {
+		return "", fmt.Errorf("error creating code reviewer: %w", err)
+	}
+
+	// Run the code review
+	result, err := reviewer.Run(ctx, nil)
+	if err != nil {
+		return "", fmt.Errorf("error running code review: %w", err)
+	}
+
+	// Normalize paths in the result
+	normalized := normalizePaths(result, dir)
+	return normalized, nil
+}
+
+// normalizePaths replaces the temp directory paths with a standard placeholder
+func normalizePaths(result string, tempDir string) string {
+	return strings.ReplaceAll(result, tempDir, "/PATH/TO/REPO")
+}
+
+// initGoModule initializes a Go module in the specified directory.
+func initGoModule(dir string) error {
+	cmd := exec.Command("go", "mod", "init", "sketch.dev")
+	cmd.Dir = dir
+	return cmd.Run()
+}
+
+// runAutoformat runs the autoformat function on the repository and returns the list of formatted files.
+func runAutoformat(dir, initialCommit string) ([]string, error) {
+	if initialCommit == "" {
+		return nil, fmt.Errorf("initial commit not set, cannot run autoformat")
+	}
+	ctx := context.Background()
+	reviewer, err := NewCodeReviewer(ctx, dir, initialCommit)
+	if err != nil {
+		return nil, fmt.Errorf("error creating code reviewer: %w", err)
+	}
+	formattedFiles := reviewer.Autoformat(ctx)
+	normalizedFiles := make([]string, len(formattedFiles))
+	for i, file := range formattedFiles {
+		normalizedFiles[i] = normalizePaths(file, dir)
+	}
+	slices.Sort(normalizedFiles)
+	return normalizedFiles, nil
+}
+
+// resolveRealPath follows symlinks and returns the real path
+// This handles platform-specific behaviors like macOS's /private prefix
+func resolveRealPath(path string) string {
+	// Follow symlinks to get the real path
+	realPath, err := filepath.EvalSymlinks(path)
+	if err != nil {
+		// If we can't resolve symlinks, just return the original path
+		return path
+	}
+	return realPath
+}
diff --git a/claudetool/differential.go b/claudetool/differential.go
index 14dce04..014fc13 100644
--- a/claudetool/differential.go
+++ b/claudetool/differential.go
@@ -6,6 +6,7 @@
 	"encoding/json"
 	"fmt"
 	"io"
+	"io/fs"
 	"log/slog"
 	"maps"
 	"os"
@@ -69,14 +70,13 @@
 	// The packages in the initial commit may be different.
 	// Good enough for now.
 	// TODO: do better
-	directPkgs, allPkgs, err := r.packagesForFiles(ctx, changedFiles)
+	allPkgs, err := r.packagesForFiles(ctx, changedFiles)
 	if err != nil {
 		// TODO: log and skip to stuff that doesn't require packages
 		slog.DebugContext(ctx, "CodeReviewer.Run: failed to get packages for files", "err", err)
 		return "", err
 	}
 	allPkgList := slices.Collect(maps.Keys(allPkgs))
-	directPkgList := slices.Collect(maps.Keys(directPkgs))
 
 	var msgs []string
 
@@ -89,16 +89,7 @@
 		msgs = append(msgs, testMsg)
 	}
 
-	vetMsg, err := r.checkVet(ctx, directPkgList)
-	if err != nil {
-		slog.DebugContext(ctx, "CodeReviewer.Run: failed to check vet", "err", err)
-		return "", err
-	}
-	if vetMsg != "" {
-		msgs = append(msgs, vetMsg)
-	}
-
-	goplsMsg, err := r.checkGopls(ctx, changedFiles)
+	goplsMsg, err := r.checkGopls(ctx, changedFiles) // includes vet checks
 	if err != nil {
 		slog.DebugContext(ctx, "CodeReviewer.Run: failed to check gopls", "err", err)
 		return "", err
@@ -111,6 +102,8 @@
 		slog.DebugContext(ctx, "CodeReviewer.Run: no issues found")
 		return "OK", nil
 	}
+
+	msgs = append(msgs, "Please fix before proceeding.")
 	slog.DebugContext(ctx, "CodeReviewer.Run: found issues", "issues", msgs)
 	return strings.Join(msgs, "\n\n"), nil
 }
@@ -139,10 +132,9 @@
 
 	afterTestCmd := exec.CommandContext(ctx, "go", goTestArgs...)
 	afterTestCmd.Dir = r.repoRoot
-	afterTestOut, afterTestErr := afterTestCmd.Output()
-	if afterTestErr == nil {
-		return "", nil // all tests pass, we're good!
-	}
+	afterTestOut, _ := afterTestCmd.Output()
+	// unfortunately, we can't short-circuit here even if all tests pass,
+	// because we need to check for skipped tests.
 
 	err := r.initializeInitialCommitWorktree(ctx)
 	if err != nil {
@@ -162,7 +154,6 @@
 	if afterParseErr != nil {
 		return "", fmt.Errorf("unable to parse test results for current commit: %w\n%s", afterParseErr, afterTestOut)
 	}
-
 	testRegressions, err := r.compareTestResults(beforeResults, afterResults)
 	if err != nil {
 		return "", fmt.Errorf("failed to compare test results: %w", err)
@@ -172,281 +163,6 @@
 	return res, nil
 }
 
-// VetIssue represents a single issue found by go vet
-type VetIssue struct {
-	Position string `json:"posn"`
-	Message  string `json:"message"`
-	// Ignoring suggested_fixes for now as we don't need them for comparison
-}
-
-// VetResult represents the JSON output of go vet -json for a single package
-type VetResult map[string][]VetIssue // category -> issues
-
-// VetResults represents the full JSON output of go vet -json
-type VetResults map[string]VetResult // package path -> result
-
-// checkVet runs go vet on the provided packages in both the current and initial state,
-// compares the results, and reports any new vet issues introduced in the current state.
-func (r *CodeReviewer) checkVet(ctx context.Context, pkgList []string) (string, error) {
-	if len(pkgList) == 0 {
-		return "", nil // no packages to check
-	}
-
-	// Run vet on the current state with JSON output
-	goVetArgs := []string{"vet", "-json"}
-	goVetArgs = append(goVetArgs, pkgList...)
-
-	afterVetCmd := exec.CommandContext(ctx, "go", goVetArgs...)
-	afterVetCmd.Dir = r.repoRoot
-	afterVetOut, afterVetErr := afterVetCmd.CombinedOutput() // ignore error, we'll parse the output regar
-	if afterVetErr != nil {
-		slog.WarnContext(ctx, "CodeReviewer.checkVet: (after) go vet failed", "err", afterVetErr, "output", string(afterVetOut))
-		return "", nil // nothing more we can do here
-	}
-
-	// Parse the JSON output (even if vet returned an error, as it does when issues are found)
-	afterVetResults, err := parseVetJSON(afterVetOut)
-	if err != nil {
-		return "", fmt.Errorf("failed to parse vet output for current state: %w", err)
-	}
-
-	// If no issues were found, we're done
-	if len(afterVetResults) == 0 || !vetResultsHaveIssues(afterVetResults) {
-		return "", nil
-	}
-
-	// Vet detected issues in the current state, check if they existed in the initial state
-	err = r.initializeInitialCommitWorktree(ctx)
-	if err != nil {
-		return "", err
-	}
-
-	beforeVetCmd := exec.CommandContext(ctx, "go", goVetArgs...)
-	beforeVetCmd.Dir = r.initialWorktree
-	beforeVetOut, _ := beforeVetCmd.CombinedOutput() // ignore error, we'll parse the output anyway
-
-	// Parse the JSON output for the initial state
-	beforeVetResults, err := parseVetJSON(beforeVetOut)
-	if err != nil {
-		return "", fmt.Errorf("failed to parse vet output for initial state: %w", err)
-	}
-
-	// Find new issues that weren't present in the initial state
-	vetRegressions := findVetRegressions(beforeVetResults, afterVetResults)
-	if !vetResultsHaveIssues(vetRegressions) {
-		return "", nil // no new issues
-	}
-
-	// Format the results
-	return formatVetRegressions(vetRegressions), nil
-}
-
-// parseVetJSON parses the JSON output from go vet -json
-func parseVetJSON(output []byte) (VetResults, error) {
-	// The output contains multiple JSON objects, one per package
-	// We need to parse them separately
-	results := make(VetResults)
-
-	// Process the output by collecting JSON chunks between # comment lines
-	lines := strings.Split(string(output), "\n")
-	currentChunk := strings.Builder{}
-
-	// Helper function to process accumulated JSON chunks
-	processChunk := func() {
-		chunk := strings.TrimSpace(currentChunk.String())
-		if chunk == "" || !strings.HasPrefix(chunk, "{") {
-			return // Skip empty chunks or non-JSON chunks
-		}
-
-		// Try to parse the chunk as JSON
-		var result VetResults
-		if err := json.Unmarshal([]byte(chunk), &result); err != nil {
-			return // Skip invalid JSON
-		}
-
-		// Merge with our results
-		for pkg, issues := range result {
-			results[pkg] = issues
-		}
-
-		// Reset the chunk builder
-		currentChunk.Reset()
-	}
-
-	// Process lines
-	for _, line := range lines {
-		// If we hit a comment line, process the previous chunk and start a new one
-		if strings.HasPrefix(strings.TrimSpace(line), "#") {
-			processChunk()
-			continue
-		}
-
-		// Add the line to the current chunk
-		currentChunk.WriteString(line)
-		currentChunk.WriteString("\n")
-	}
-
-	// Process the final chunk
-	processChunk()
-
-	return results, nil
-}
-
-// vetResultsHaveIssues checks if there are any actual issues in the vet results
-func vetResultsHaveIssues(results VetResults) bool {
-	for _, pkgResult := range results {
-		for _, issues := range pkgResult {
-			if len(issues) > 0 {
-				return true
-			}
-		}
-	}
-	return false
-}
-
-// findVetRegressions identifies vet issues that are new in the after state
-func findVetRegressions(before, after VetResults) VetResults {
-	regressions := make(VetResults)
-
-	// Go through all packages in the after state
-	for pkgPath, afterPkgResults := range after {
-		beforePkgResults, pkgExistedBefore := before[pkgPath]
-
-		// Initialize package in regressions if it has issues
-		if !pkgExistedBefore {
-			// If the package didn't exist before, all issues are new
-			regressions[pkgPath] = afterPkgResults
-			continue
-		}
-
-		// Compare issues by category
-		for category, afterIssues := range afterPkgResults {
-			beforeIssues, categoryExistedBefore := beforePkgResults[category]
-
-			if !categoryExistedBefore {
-				// If this category didn't exist before, all issues are new
-				if regressions[pkgPath] == nil {
-					regressions[pkgPath] = make(VetResult)
-				}
-				regressions[pkgPath][category] = afterIssues
-				continue
-			}
-
-			// Compare individual issues
-			var newIssues []VetIssue
-			for _, afterIssue := range afterIssues {
-				if !issueExistsIn(afterIssue, beforeIssues) {
-					newIssues = append(newIssues, afterIssue)
-				}
-			}
-
-			// Add new issues to regressions
-			if len(newIssues) > 0 {
-				if regressions[pkgPath] == nil {
-					regressions[pkgPath] = make(VetResult)
-				}
-				regressions[pkgPath][category] = newIssues
-			}
-		}
-	}
-
-	return regressions
-}
-
-// issueExistsIn checks if an issue already exists in a list of issues
-// using a looser comparison that's resilient to position changes
-func issueExistsIn(issue VetIssue, issues []VetIssue) bool {
-	issueFile := extractFilePath(issue.Position)
-
-	for _, existing := range issues {
-		// Main comparison is by message content, which is likely stable
-		if issue.Message == existing.Message {
-			// If messages match exactly, consider it the same issue even if position changed
-			return true
-		}
-
-		// As a secondary check, if the issue is in the same file and has similar message,
-		// it's likely the same issue that might have been slightly reworded or relocated
-		existingFile := extractFilePath(existing.Position)
-		if issueFile == existingFile && messagesSimilar(issue.Message, existing.Message) {
-			return true
-		}
-	}
-	return false
-}
-
-// extractFilePath gets just the file path from a position string like "/path/to/file.go:10:15"
-func extractFilePath(position string) string {
-	parts := strings.Split(position, ":")
-	if len(parts) >= 1 {
-		return parts[0]
-	}
-	return position // fallback to the full position if we can't parse it
-}
-
-// messagesSimilar checks if two messages are similar enough to be considered the same issue
-// This is a simple implementation that could be enhanced with more sophisticated text comparison
-func messagesSimilar(msg1, msg2 string) bool {
-	// For now, simple similarity check: if one is a substring of the other
-	return strings.Contains(msg1, msg2) || strings.Contains(msg2, msg1)
-}
-
-// formatVetRegressions generates a human-readable summary of vet regressions
-func formatVetRegressions(regressions VetResults) string {
-	if !vetResultsHaveIssues(regressions) {
-		return ""
-	}
-
-	var sb strings.Builder
-	sb.WriteString("Go vet issues detected:\n\n")
-
-	// Get sorted list of packages for deterministic output
-	pkgPaths := make([]string, 0, len(regressions))
-	for pkgPath := range regressions {
-		pkgPaths = append(pkgPaths, pkgPath)
-	}
-	slices.Sort(pkgPaths)
-
-	issueCount := 1
-	for _, pkgPath := range pkgPaths {
-		pkgResult := regressions[pkgPath]
-
-		// Get sorted list of categories
-		categories := make([]string, 0, len(pkgResult))
-		for category := range pkgResult {
-			categories = append(categories, category)
-		}
-		slices.Sort(categories)
-
-		for _, category := range categories {
-			issues := pkgResult[category]
-
-			// Skip empty issue lists (shouldn't happen, but just in case)
-			if len(issues) == 0 {
-				continue
-			}
-
-			// Sort issues by position for deterministic output
-			slices.SortFunc(issues, func(a, b VetIssue) int {
-				return strings.Compare(a.Position, b.Position)
-			})
-
-			// Format each issue
-			for _, issue := range issues {
-				sb.WriteString(fmt.Sprintf("%d. [%s] %s: %s\n",
-					issueCount,
-					category,
-					issue.Position,
-					issue.Message))
-				issueCount++
-			}
-		}
-	}
-
-	sb.WriteString("\nPlease fix these issues before proceeding.")
-	return sb.String()
-}
-
 // GoplsIssue represents a single issue reported by gopls check
 type GoplsIssue struct {
 	Position string // File position in format "file:line:col-range"
@@ -496,7 +212,7 @@
 	}
 
 	// Parse the output
-	afterIssues := parseGoplsOutput(afterGoplsOut)
+	afterIssues := parseGoplsOutput(r.repoRoot, afterGoplsOut)
 
 	// If no issues were found, we're done
 	if len(afterIssues) == 0 {
@@ -530,22 +246,18 @@
 	}
 
 	// Run gopls check on the files that existed in the initial commit
-	beforeIssues := []GoplsIssue{}
+	var beforeIssues []GoplsIssue
 	if len(initialFilesToCheck) > 0 {
 		beforeGoplsArgs := append([]string{"check"}, initialFilesToCheck...)
 		beforeGoplsCmd := exec.CommandContext(ctx, "gopls", beforeGoplsArgs...)
 		beforeGoplsCmd.Dir = r.initialWorktree
-		var beforeGoplsOut []byte
-		var beforeCmdErr error
-		beforeGoplsOut, beforeCmdErr = beforeGoplsCmd.CombinedOutput()
+		beforeGoplsOut, beforeCmdErr := beforeGoplsCmd.CombinedOutput()
 		if beforeCmdErr != nil && !looksLikeGoplsIssues(beforeGoplsOut) {
 			// If gopls fails to run properly on the initial commit, log a warning and continue
 			// with empty before issues - this will be conservative and report more issues
-			slog.WarnContext(ctx, "CodeReviewer.checkGopls: gopls check failed on initial commit",
-				"err", err, "output", string(beforeGoplsOut))
-			// Continue with empty beforeIssues
+			slog.WarnContext(ctx, "CodeReviewer.checkGopls: gopls check failed on initial commit", "err", err, "output", string(beforeGoplsOut))
 		} else {
-			beforeIssues = parseGoplsOutput(beforeGoplsOut)
+			beforeIssues = parseGoplsOutput(r.initialWorktree, beforeGoplsOut)
 		}
 	}
 
@@ -556,12 +268,12 @@
 	}
 
 	// Format the results
-	return formatGoplsRegressions(goplsRegressions), nil
+	return r.formatGoplsRegressions(goplsRegressions), nil
 }
 
 // parseGoplsOutput parses the text output from gopls check
 // Each line has the format: '/path/to/file.go:448:22-26: unused parameter: path'
-func parseGoplsOutput(output []byte) []GoplsIssue {
+func parseGoplsOutput(root string, output []byte) []GoplsIssue {
 	var issues []GoplsIssue
 	lines := strings.Split(string(output), "\n")
 
@@ -600,6 +312,10 @@
 
 		// Extract position and message
 		position := line[:messageStart]
+		rel, err := filepath.Rel(root, position)
+		if err == nil {
+			position = rel
+		}
 		message := line[messageStart+2:] // Skip the ': ' separator
 
 		// Verify position has the expected format (at least 2 colons for line:col)
@@ -717,7 +433,7 @@
 }
 
 // formatGoplsRegressions generates a human-readable summary of gopls check regressions
-func formatGoplsRegressions(regressions []GoplsIssue) string {
+func (r *CodeReviewer) formatGoplsRegressions(regressions []GoplsIssue) string {
 	if len(regressions) == 0 {
 		return ""
 	}
@@ -727,11 +443,11 @@
 
 	// Format each issue
 	for i, issue := range regressions {
-		sb.WriteString(fmt.Sprintf("%d. %s: %s\n", i+1, issue.Position, issue.Message))
+		sb.WriteString(fmt.Sprintf("%d. %s: %s\n", i+1, filepath.Join(r.repoRoot, issue.Position), issue.Message))
 	}
 
-	sb.WriteString("\nIMPORTANT: Only fix new gopls check issues in parts of the code that you have already edited. ")
-	sb.WriteString("Do not change existing code that was not part of your current edits.")
+	sb.WriteString("\nIMPORTANT: Only fix new gopls check issues in parts of the code that you have already edited.")
+	sb.WriteString(" Do not change existing code that was not part of your current edits.\n")
 	return sb.String()
 }
 
@@ -748,10 +464,10 @@
 // 2. allPkgs: all packages that might be affected, including downstream packages that depend on the direct packages
 // It may include false positives.
 // Files must be absolute paths!
-func (r *CodeReviewer) packagesForFiles(ctx context.Context, files []string) (directPkgs, allPkgs map[string]*packages.Package, err error) {
+func (r *CodeReviewer) packagesForFiles(ctx context.Context, files []string) (allPkgs map[string]*packages.Package, err error) {
 	for _, f := range files {
 		if !filepath.IsAbs(f) {
-			return nil, nil, fmt.Errorf("path %q is not absolute", f)
+			return nil, fmt.Errorf("path %q is not absolute", f)
 		}
 	}
 	cfg := &packages.Config{
@@ -767,14 +483,12 @@
 	}
 	universe, err := packages.Load(cfg, "./...")
 	if err != nil {
-		return nil, nil, err
+		return nil, err
 	}
 	// Identify packages that directly contain the changed files
-	directPkgs = make(map[string]*packages.Package) // import path -> package
+	directPkgs := make(map[string]*packages.Package) // import path -> package
 	for _, pkg := range universe {
-		// fmt.Println("pkg:", pkg.PkgPath)
 		pkgFiles := allFiles(pkg)
-		// fmt.Println("pkgFiles:", pkgFiles)
 		for _, file := range files {
 			if pkgFiles[file] {
 				// prefer test packages, as they contain strictly more files (right?)
@@ -786,27 +500,35 @@
 		}
 	}
 
-	// Create a copy of directPkgs to expand with dependencies
-	allPkgs = make(map[string]*packages.Package)
-	for k, v := range directPkgs {
-		allPkgs[k] = v
-	}
+	allPkgs = maps.Clone(directPkgs)
 
 	// Add packages that depend on the direct packages
 	addDependentPackages(universe, allPkgs)
-	return directPkgs, allPkgs, nil
+	return allPkgs, nil
 }
 
 // allFiles returns all files that might be referenced by the package.
 // It may contain false positives.
 func allFiles(p *packages.Package) map[string]bool {
 	files := make(map[string]bool)
+	// Add files from package info
 	add := [][]string{p.GoFiles, p.CompiledGoFiles, p.OtherFiles, p.EmbedFiles, p.IgnoredFiles}
 	for _, extra := range add {
 		for _, file := range extra {
 			files[file] = true
 		}
 	}
+	// Add files from testdata directory
+	testdataDir := filepath.Join(p.Dir, "testdata")
+	if _, err := os.Stat(testdataDir); err == nil {
+		fsys := os.DirFS(p.Dir)
+		fs.WalkDir(fsys, "testdata", func(path string, d fs.DirEntry, err error) error {
+			if err == nil && !d.IsDir() {
+				files[filepath.Join(p.Dir, path)] = true
+			}
+			return nil
+		})
+	}
 	return files
 }
 
@@ -816,6 +538,10 @@
 	for {
 		changed := false
 		for _, p := range universe {
+			if strings.HasSuffix(p.PkgPath, ".test") { // ick, but I don't see another way
+				// skip test packages
+				continue
+			}
 			if _, ok := pkgs[p.PkgPath]; ok {
 				// already in pkgs
 				continue
@@ -910,146 +636,151 @@
 // testStatus represents the status of a test in a given commit
 type testStatus int
 
+//go:generate go tool stringer -type=testStatus -trimprefix=testStatus
 const (
 	testStatusUnknown testStatus = iota
 	testStatusPass
 	testStatusFail
 	testStatusBuildFail
 	testStatusSkip
+	testStatusNoTests // no tests exist for this package
 )
 
-// testInfo represents information about a specific test
-type testInfo struct {
-	Package string
-	Test    string // empty for package tests
-}
-
-// String returns a human-readable string representation of the test
-func (t testInfo) String() string {
-	if t.Test == "" {
-		return t.Package
-	}
-	return fmt.Sprintf("%s.%s", t.Package, t.Test)
-}
-
 // testRegression represents a test that regressed between commits
 type testRegression struct {
-	Info         testInfo
+	Package      string
+	Test         string // empty for package tests
 	BeforeStatus testStatus
 	AfterStatus  testStatus
 	Output       string // failure output in the after state
 }
 
-// collectTestStatuses processes a slice of test events and returns a map of test statuses
-func collectTestStatuses(results []testJSON) map[testInfo]testStatus {
-	statuses := make(map[testInfo]testStatus)
-	failedBuilds := make(map[string]bool)      // track packages with build failures
-	testOutputs := make(map[testInfo][]string) // collect output for failing tests
-
-	// First pass: identify build failures
-	for _, result := range results {
-		if result.Action == "fail" && result.FailedBuild != "" {
-			failedBuilds[result.FailedBuild] = true
-		}
+func (r *testRegression) Source() string {
+	if r.Test == "" {
+		return r.Package
 	}
+	return fmt.Sprintf("%s.%s", r.Package, r.Test)
+}
 
-	// Second pass: collect test statuses
-	for _, result := range results {
-		info := testInfo{Package: result.Package, Test: result.Test}
+type packageResult struct {
+	Status     testStatus            // overall status for the package
+	TestStatus map[string]testStatus // name -> status
+	TestOutput map[string][]string   // name -> output parts
+}
 
-		// Skip output events for now, we'll process them in a separate pass
-		if result.Action == "output" {
-			if result.Test != "" { // only collect output for actual tests, not package messages
-				testOutputs[info] = append(testOutputs[info], result.Output)
-			}
-			continue
+// collectTestStatuses processes a slice of test events and returns rich status information
+func collectTestStatuses(results []testJSON) map[string]*packageResult {
+	m := make(map[string]*packageResult)
+
+	for _, event := range results {
+		pkg := event.Package
+		p, ok := m[pkg]
+		if !ok {
+			p = new(packageResult)
+			p.TestStatus = make(map[string]testStatus)
+			p.TestOutput = make(map[string][]string)
+			m[pkg] = p
 		}
 
-		// Handle BuildEvent output
-		if result.Action == "build-fail" {
-			// Mark all tests in this package as build failures
-			for ti := range statuses {
-				if ti.Package == result.ImportPath {
-					statuses[ti] = testStatusBuildFail
-				}
-			}
+		switch event.Action {
+		case "output":
+			p.TestOutput[event.Test] = append(p.TestOutput[event.Test], event.Output)
 			continue
-		}
-
-		// Check if the package has a build failure
-		if _, hasBuildFailure := failedBuilds[result.Package]; hasBuildFailure {
-			statuses[info] = testStatusBuildFail
-			continue
-		}
-
-		// Handle test events
-		switch result.Action {
 		case "pass":
-			statuses[info] = testStatusPass
+			if event.Test == "" {
+				p.Status = testStatusPass
+			} else {
+				p.TestStatus[event.Test] = testStatusPass
+			}
 		case "fail":
-			statuses[info] = testStatusFail
+			if event.Test == "" {
+				if event.FailedBuild != "" {
+					p.Status = testStatusBuildFail
+				} else {
+					p.Status = testStatusFail
+				}
+			} else {
+				p.TestStatus[event.Test] = testStatusFail
+			}
 		case "skip":
-			statuses[info] = testStatusSkip
+			if event.Test == "" {
+				p.Status = testStatusNoTests
+			} else {
+				p.TestStatus[event.Test] = testStatusSkip
+			}
 		}
 	}
 
-	return statuses
+	return m
 }
 
 // compareTestResults identifies tests that have regressed between commits
 func (r *CodeReviewer) compareTestResults(beforeResults, afterResults []testJSON) ([]testRegression, error) {
-	beforeStatuses := collectTestStatuses(beforeResults)
-	afterStatuses := collectTestStatuses(afterResults)
+	before := collectTestStatuses(beforeResults)
+	after := collectTestStatuses(afterResults)
+	var testLevelRegressions []testRegression
+	var packageLevelRegressions []testRegression
 
-	// Collect output for failing tests
-	testOutputMap := make(map[testInfo]string)
-	for _, result := range afterResults {
-		if result.Action == "output" {
-			info := testInfo{Package: result.Package, Test: result.Test}
-			testOutputMap[info] += result.Output
+	afterPkgs := slices.Sorted(maps.Keys(after))
+	for _, pkg := range afterPkgs {
+		afterResult := after[pkg]
+		afterStatus := afterResult.Status
+		// Can't short-circuit here when tests are passing, because we need to check for skipped tests.
+		beforeResult, ok := before[pkg]
+		beforeStatus := testStatusNoTests
+		if ok {
+			beforeStatus = beforeResult.Status
+		}
+		// If things no longer build, stop at the package level.
+		// Otherwise, proceed to the test level, so we have more precise information.
+		if afterStatus == testStatusBuildFail && beforeStatus != testStatusBuildFail {
+			packageLevelRegressions = append(packageLevelRegressions, testRegression{
+				Package:      pkg,
+				BeforeStatus: beforeStatus,
+				AfterStatus:  afterStatus,
+			})
+			continue
+		}
+		tests := slices.Sorted(maps.Keys(afterResult.TestStatus))
+		for _, test := range tests {
+			afterStatus := afterResult.TestStatus[test]
+			switch afterStatus {
+			case testStatusPass:
+				continue
+			case testStatusUnknown:
+				slog.WarnContext(context.Background(), "unknown test status", "package", pkg, "test", test)
+				continue
+			}
+			beforeStatus := beforeResult.TestStatus[test]
+			if isRegression(beforeStatus, afterStatus) {
+				testLevelRegressions = append(testLevelRegressions, testRegression{
+					Package:      pkg,
+					Test:         test,
+					BeforeStatus: beforeStatus,
+					AfterStatus:  afterStatus,
+					Output:       strings.Join(afterResult.TestOutput[test], ""),
+				})
+			}
 		}
 	}
 
+	// If we have test-level regressions, report only those
+	// Otherwise, report package-level regressions
 	var regressions []testRegression
-
-	// Look for tests that regressed
-	for info, afterStatus := range afterStatuses {
-		// Skip tests that are passing or skipped in the after state
-		if afterStatus == testStatusPass || afterStatus == testStatusSkip {
-			continue
-		}
-
-		// Get the before status (default to unknown if not present)
-		beforeStatus, exists := beforeStatuses[info]
-		if !exists {
-			beforeStatus = testStatusUnknown
-		}
-
-		// Log warning if we encounter unexpected unknown status in the 'after' state
-		if afterStatus == testStatusUnknown {
-			slog.WarnContext(context.Background(), "Unexpected unknown test status encountered",
-				"package", info.Package, "test", info.Test)
-		}
-
-		// Check for regressions
-		if isRegression(beforeStatus, afterStatus) {
-			regressions = append(regressions, testRegression{
-				Info:         info,
-				BeforeStatus: beforeStatus,
-				AfterStatus:  afterStatus,
-				Output:       testOutputMap[info],
-			})
-		}
+	if len(testLevelRegressions) > 0 {
+		regressions = testLevelRegressions
+	} else {
+		regressions = packageLevelRegressions
 	}
 
 	// Sort regressions for consistent output
 	slices.SortFunc(regressions, func(a, b testRegression) int {
 		// First by package
-		if c := strings.Compare(a.Info.Package, b.Info.Package); c != 0 {
+		if c := strings.Compare(a.Package, b.Package); c != 0 {
 			return c
 		}
 		// Then by test name
-		return strings.Compare(a.Info.Test, b.Info.Test)
+		return strings.Compare(a.Test, b.Test)
 	})
 
 	return regressions, nil
@@ -1058,13 +789,34 @@
 // badnessLevels maps test status to a badness level
 // Higher values indicate worse status (more severe issues)
 var badnessLevels = map[testStatus]int{
-	testStatusBuildFail: 4, // Worst
-	testStatusFail:      3,
-	testStatusSkip:      2,
+	testStatusBuildFail: 5, // Worst
+	testStatusFail:      4,
+	testStatusSkip:      3,
+	testStatusNoTests:   2,
 	testStatusPass:      1,
 	testStatusUnknown:   0, // Least bad - avoids false positives
 }
 
+// regressionFormatter defines a mapping of before/after state pairs to descriptive messages
+type regressionKey struct {
+	before, after testStatus
+}
+
+var regressionMessages = map[regressionKey]string{
+	{testStatusUnknown, testStatusBuildFail}: "New test has build/vet errors",
+	{testStatusUnknown, testStatusFail}:      "New test is failing",
+	{testStatusUnknown, testStatusSkip}:      "New test is skipped",
+	{testStatusPass, testStatusBuildFail}:    "Was passing, now has build/vet errors",
+	{testStatusPass, testStatusFail}:         "Was passing, now failing",
+	{testStatusPass, testStatusSkip}:         "Was passing, now skipped",
+	{testStatusNoTests, testStatusBuildFail}: "Previously had no tests, now has build/vet errors",
+	{testStatusNoTests, testStatusFail}:      "Previously had no tests, now has failing tests",
+	{testStatusNoTests, testStatusSkip}:      "Previously had no tests, now has skipped tests",
+	{testStatusSkip, testStatusBuildFail}:    "Was skipped, now has build/vet errors",
+	{testStatusSkip, testStatusFail}:         "Was skipped, now failing",
+	{testStatusFail, testStatusBuildFail}:    "Was failing, now has build/vet errors",
+}
+
 // isRegression determines if a test has regressed based on before and after status
 // A regression is defined as an increase in badness level
 func isRegression(before, after testStatus) bool {
@@ -1078,48 +830,18 @@
 		return ""
 	}
 
-	var sb strings.Builder
-	sb.WriteString(fmt.Sprintf("Test regressions detected between initial commit (%s) and HEAD:\n\n", r.initialCommit))
+	buf := new(strings.Builder)
+	fmt.Fprintf(buf, "Test regressions detected between initial commit (%s) and HEAD:\n\n", r.initialCommit)
 
 	for i, reg := range regressions {
-		// Describe the regression
-		sb.WriteString(fmt.Sprintf("%d. %s: ", i+1, reg.Info.String()))
-
-		switch {
-		case reg.BeforeStatus == testStatusUnknown && reg.AfterStatus == testStatusFail:
-			sb.WriteString("New test is failing")
-		case reg.BeforeStatus == testStatusUnknown && reg.AfterStatus == testStatusBuildFail:
-			sb.WriteString("New test has build errors")
-		case reg.BeforeStatus == testStatusPass && reg.AfterStatus == testStatusFail:
-			sb.WriteString("Was passing, now failing")
-		case reg.BeforeStatus == testStatusPass && reg.AfterStatus == testStatusBuildFail:
-			sb.WriteString("Was passing, now has build errors")
-		case reg.BeforeStatus == testStatusSkip && reg.AfterStatus == testStatusFail:
-			sb.WriteString("Was skipped, now failing")
-		case reg.BeforeStatus == testStatusSkip && reg.AfterStatus == testStatusBuildFail:
-			sb.WriteString("Was skipped, now has build errors")
-		default:
-			sb.WriteString("Regression detected")
+		fmt.Fprintf(buf, "%d: %v: ", i+1, reg.Source())
+		key := regressionKey{reg.BeforeStatus, reg.AfterStatus}
+		message, exists := regressionMessages[key]
+		if !exists {
+			message = "Regression detected"
 		}
-		sb.WriteString("\n")
-
-		// Add failure output with indentation for readability
-		if reg.Output != "" {
-			outputLines := strings.Split(strings.TrimSpace(reg.Output), "\n")
-			// Limit output to first 10 lines to avoid overwhelming feedback
-			shownLines := min(len(outputLines), 10)
-
-			sb.WriteString("   Output:\n")
-			for _, line := range outputLines[:shownLines] {
-				sb.WriteString(fmt.Sprintf("   | %s\n", line))
-			}
-			if shownLines < len(outputLines) {
-				sb.WriteString(fmt.Sprintf("   | ... (%d more lines)\n", len(outputLines)-shownLines))
-			}
-		}
-		sb.WriteString("\n")
+		fmt.Fprintf(buf, "%s\n", message)
 	}
 
-	sb.WriteString("Please fix these test failures before proceeding.")
-	return sb.String()
+	return buf.String()
 }
diff --git a/claudetool/testdata/add_skipped_test.txtar b/claudetool/testdata/add_skipped_test.txtar
new file mode 100644
index 0000000..95000cc
--- /dev/null
+++ b/claudetool/testdata/add_skipped_test.txtar
@@ -0,0 +1,27 @@
+Adding a skipped test is a regression
+
+-- p_test.go --
+package p
+
+-- .commit --
+Initial commit with no tests
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+	t.SkipNow()
+}
+
+-- .commit --
+Add skipped test
+
+-- .run_test --
+Test regressions detected between initial commit (2cdfdc5009e364add9a263909472f85eb08480ed) and HEAD:
+
+1: sketch.dev.TestP: New test is skipped
+
+
+Please fix before proceeding.
diff --git a/claudetool/testdata/basic_gofmt.txtar b/claudetool/testdata/basic_gofmt.txtar
new file mode 100644
index 0000000..c32ebe9
--- /dev/null
+++ b/claudetool/testdata/basic_gofmt.txtar
@@ -0,0 +1,13 @@
+Test autoformatting happens
+
+-- .commit --
+Initial commit
+
+-- p.go --
+    package p
+
+-- .commit --
+Add non-gofmt file
+
+-- .run_autoformat --
+/PATH/TO/REPO/p.go
diff --git a/claudetool/testdata/empty_testdir_add_file.txtar b/claudetool/testdata/empty_testdir_add_file.txtar
new file mode 100644
index 0000000..b754f3d
--- /dev/null
+++ b/claudetool/testdata/empty_testdir_add_file.txtar
@@ -0,0 +1,37 @@
+Test adding a non-empty file to testdata when test expects empty files
+
+-- p_test.go --
+package p
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestEmptyTestdata(t *testing.T) {
+	files, _ := filepath.Glob("testdata/*")
+	for _, path := range files {
+		data, _ := os.ReadFile(path)
+		if len(data) > 0 {
+			t.Fatalf("testdata file is not empty: %s", path)
+		}
+	}
+}
+
+-- testdata/empty --
+-- .commit --
+Initial commit with empty testdata file
+
+-- testdata/nonempty --
+hi
+-- .commit --
+Add non-empty file to testdata
+
+-- .run_test --
+Test regressions detected between initial commit (cdc9ec6dfb8669c11d8aa0df49c72112627784dc) and HEAD:
+
+1: sketch.dev.TestEmptyTestdata: Was passing, now failing
+
+
+Please fix before proceeding.
diff --git a/claudetool/testdata/failing_to_failing.txtar b/claudetool/testdata/failing_to_failing.txtar
new file mode 100644
index 0000000..eea1b59
--- /dev/null
+++ b/claudetool/testdata/failing_to_failing.txtar
@@ -0,0 +1,28 @@
+Going from failing tests to failing tests is not a regression
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestX(t *testing.T) {
+	t.FailNow()
+}
+
+-- .commit --
+Initial commit (with failing test)
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestX(t *testing.T) {
+	t.FailNow()
+}
+
+-- .commit --
+Test still failing
+
+-- .run_test --
+OK
diff --git a/claudetool/testdata/fmt_hierarchy.txtar b/claudetool/testdata/fmt_hierarchy.txtar
new file mode 100644
index 0000000..613d716
--- /dev/null
+++ b/claudetool/testdata/fmt_hierarchy.txtar
@@ -0,0 +1,58 @@
+Test autoformatting hierarchy
+
+-- bad.go --
+// This file is not properly gofmt'd.
+	package main
+-- gofmt.go --
+// This file is properly gofmt'd.
+package main
+-- goimports.go --
+// This file is properly goimport'd, but not gofumpt'd.
+package main
+
+import (
+	"fmt"
+)
+
+func main() {
+
+	fmt.Println("hi")
+}
+-- gofumpt.go --
+// This file is properly gofumpt'd.
+package main
+-- .commit --
+Initial commit
+
+-- bad.go --
+// This file is still not gofmt'd, but we won't complain, because it was already not gofmt'd.
+	package main
+
+	import "fmt"
+-- gofmt.go --
+// This file is no longer properly gofmt'd.
+	package main
+-- goimports.go --
+// If we remove the imports, it no longer is goimport'd.
+// It should thus be flagged as a formatting issue, despite being gofmt'd.
+package main
+
+func main() {
+
+	fmt.Println("hi")
+}
+-- gofumpt.go --
+// This file is properly gofmt'd, but no longer gofumpt'd.
+package main
+
+func main() {
+
+	fmt.Println("hi")
+}
+-- .commit --
+Mess with formatting
+
+-- .run_autoformat --
+/PATH/TO/REPO/gofmt.go
+/PATH/TO/REPO/gofumpt.go
+/PATH/TO/REPO/goimports.go
diff --git a/claudetool/testdata/gopls_issue_unchanged.txtar b/claudetool/testdata/gopls_issue_unchanged.txtar
new file mode 100644
index 0000000..d1de43b
--- /dev/null
+++ b/claudetool/testdata/gopls_issue_unchanged.txtar
@@ -0,0 +1,25 @@
+Pre-existing gopls issues should not be reported as regressions
+
+-- p.go --
+package p
+
+func F() {
+	x := 42
+}
+
+-- .commit --
+Initial commit with existing gopls issue
+
+-- p.go --
+package p
+
+func F() {
+	x := 42
+	// unrelated change
+}
+
+-- .commit --
+Make a change but keep the same gopls issue
+
+-- .run_test --
+OK
diff --git a/claudetool/testdata/gopls_issues.txtar b/claudetool/testdata/gopls_issues.txtar
new file mode 100644
index 0000000..56f1557
--- /dev/null
+++ b/claudetool/testdata/gopls_issues.txtar
@@ -0,0 +1,25 @@
+Detect newly introduced gopls issues
+
+-- .commit --
+Initial commit
+
+-- p.go --
+package p
+
+func F() {
+	return
+	panic("unreachable")
+}
+
+-- .commit --
+Add file with gopls issues
+
+-- .run_test --
+Gopls check issues detected:
+
+1. /PATH/TO/REPO/p.go:5:2-22: unreachable code
+
+IMPORTANT: Only fix new gopls check issues in parts of the code that you have already edited. Do not change existing code that was not part of your current edits.
+
+
+Please fix before proceeding.
diff --git a/claudetool/testdata/gopls_line_number_changed.txtar b/claudetool/testdata/gopls_line_number_changed.txtar
new file mode 100644
index 0000000..c7eeb37
--- /dev/null
+++ b/claudetool/testdata/gopls_line_number_changed.txtar
@@ -0,0 +1,26 @@
+Pre-existing gopls issues with line number changes should not be reported as regressions
+
+-- p.go --
+package p
+
+func F() {
+	x := 42
+}
+
+-- .commit --
+Initial commit with existing gopls issue
+
+-- p.go --
+package p
+
+// Change the line number of the unused variable
+
+func F() {
+	x := 42
+}
+
+-- .commit --
+Add comments that change line numbers
+
+-- .run_test --
+OK
diff --git a/claudetool/testdata/gopls_new_issue_with_existing.txtar b/claudetool/testdata/gopls_new_issue_with_existing.txtar
new file mode 100644
index 0000000..d47fb02
--- /dev/null
+++ b/claudetool/testdata/gopls_new_issue_with_existing.txtar
@@ -0,0 +1,36 @@
+Only new gopls issues should be reported when existing issues are present
+
+-- p.go --
+package p
+
+func F() {
+	x := 42
+}
+
+-- .commit --
+Initial commit with existing gopls issue
+
+-- p.go --
+package p
+
+func F() {
+	x := 42
+}
+
+func G() {
+	panic("i went down down down")
+	panic("burning ring of fire")
+}
+
+-- .commit --
+Add a new function with a new gopls issue
+
+-- .run_test --
+Gopls check issues detected:
+
+1. /PATH/TO/REPO/p.go:9:2-31: unreachable code
+
+IMPORTANT: Only fix new gopls check issues in parts of the code that you have already edited. Do not change existing code that was not part of your current edits.
+
+
+Please fix before proceeding.
diff --git a/claudetool/testdata/mark_test_skipped.txtar b/claudetool/testdata/mark_test_skipped.txtar
new file mode 100644
index 0000000..bf887e3
--- /dev/null
+++ b/claudetool/testdata/mark_test_skipped.txtar
@@ -0,0 +1,32 @@
+Marking a passing test as skipped test is a regression
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+}
+
+-- .commit --
+Initial commit
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+	t.SkipNow()
+}
+
+-- .commit --
+Skip test
+
+-- .run_test --
+Test regressions detected between initial commit (851ba0628e0e2d93e27e168d6af336f3e4d375c7) and HEAD:
+
+1: sketch.dev.TestP: Was passing, now skipped
+
+
+Please fix before proceeding.
diff --git a/claudetool/testdata/multi_commit_review.txtar b/claudetool/testdata/multi_commit_review.txtar
new file mode 100644
index 0000000..2286dee
--- /dev/null
+++ b/claudetool/testdata/multi_commit_review.txtar
@@ -0,0 +1,51 @@
+Test that all issues across multiple commits are reported
+
+-- a.go --
+package main
+
+-- .commit --
+Initial commit
+
+-- b.go --
+package main
+
+import "sync"
+
+var x sync.Mutex
+var y = x
+
+-- .commit --
+Add another file
+
+-- c_test.go --
+package main
+
+import "testing"
+
+func TestX(t *testing.T) {
+	t.FailNow()
+}
+
+-- .commit --
+Add a failing test
+
+-- d.go --
+package main
+
+-- .commit --
+Add yet another file
+
+-- .run_test --
+Test regressions detected between initial commit (a4fd3c2166b35e02a2cde466880e45aea6e6212e) and HEAD:
+
+1: sketch.dev.TestX: New test is failing
+
+
+Gopls check issues detected:
+
+1. /PATH/TO/REPO/b.go:6:9-10: variable declaration copies lock value to y: sync.Mutex
+
+IMPORTANT: Only fix new gopls check issues in parts of the code that you have already edited. Do not change existing code that was not part of your current edits.
+
+
+Please fix before proceeding.
diff --git a/claudetool/testdata/multiple_format_issues.txtar b/claudetool/testdata/multiple_format_issues.txtar
new file mode 100644
index 0000000..0024a48
--- /dev/null
+++ b/claudetool/testdata/multiple_format_issues.txtar
@@ -0,0 +1,21 @@
+Test that formatting issues are not reported if pre-existing, per-file
+
+-- main.go --
+	package main
+
+-- .commit --
+Initial commit
+
+-- main.go --
+	package main
+
+	import "fmt"
+
+-- file1.go --
+	package main
+
+-- .commit --
+Add multiple files with formatting issues
+
+-- .run_autoformat --
+/PATH/TO/REPO/file1.go
diff --git a/claudetool/testdata/new_test_build_error.txtar b/claudetool/testdata/new_test_build_error.txtar
new file mode 100644
index 0000000..2fe1d34
--- /dev/null
+++ b/claudetool/testdata/new_test_build_error.txtar
@@ -0,0 +1,27 @@
+Adding a new test with build errors is a regression
+
+-- p.go --
+package p
+
+-- .commit --
+Initial commit with no tests
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+	(
+}
+
+-- .commit --
+Add test with build error
+
+-- .run_test --
+Test regressions detected between initial commit (cf75ed70e940ce33a17a70b1894ad053b67731c0) and HEAD:
+
+1: sketch.dev: Previously had no tests, now has build/vet errors
+
+
+Please fix before proceeding.
diff --git a/claudetool/testdata/no_fmt_autogenerated.txtar b/claudetool/testdata/no_fmt_autogenerated.txtar
new file mode 100644
index 0000000..aab3f57
--- /dev/null
+++ b/claudetool/testdata/no_fmt_autogenerated.txtar
@@ -0,0 +1,22 @@
+Test that autogenerated files do not get formatted
+
+-- .commit --
+Initial commit
+
+-- generated.go --
+// Code generated DO NOT EDIT.
+
+package main
+
+import (
+_ "fmt"
+)
+
+-- manual.go --
+	package main
+
+-- .commit --
+Add generated and manual files with poor formatting
+
+-- .run_autoformat --
+/PATH/TO/REPO/manual.go
diff --git a/claudetool/testdata/no_tests.txtar b/claudetool/testdata/no_tests.txtar
new file mode 100644
index 0000000..ceeb2a5
--- /dev/null
+++ b/claudetool/testdata/no_tests.txtar
@@ -0,0 +1,19 @@
+Going from no tests to no tests is not a regression
+
+-- p.go --
+package p
+
+-- .commit --
+Initial commit
+
+-- other.go --
+package p
+
+func F() {
+}
+
+-- .commit --
+Add func
+
+-- .run_test --
+OK
diff --git a/claudetool/testdata/no_tests_to_failing_tests.txtar b/claudetool/testdata/no_tests_to_failing_tests.txtar
new file mode 100644
index 0000000..b0cc956
--- /dev/null
+++ b/claudetool/testdata/no_tests_to_failing_tests.txtar
@@ -0,0 +1,27 @@
+Adding failing tests is a regression
+
+-- p.go --
+package p
+
+-- .commit --
+Initial commit with no tests
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+	t.FailNow()
+}
+
+-- .commit --
+Add failing test
+
+-- .run_test --
+Test regressions detected between initial commit (cf75ed70e940ce33a17a70b1894ad053b67731c0) and HEAD:
+
+1: sketch.dev.TestP: New test is failing
+
+
+Please fix before proceeding.
diff --git a/claudetool/testdata/passing_to_build_error.txtar b/claudetool/testdata/passing_to_build_error.txtar
new file mode 100644
index 0000000..9558862
--- /dev/null
+++ b/claudetool/testdata/passing_to_build_error.txtar
@@ -0,0 +1,32 @@
+Going from a passing test to a test with build errors is a regression
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+}
+
+-- .commit --
+Initial commit with passing test
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+	(
+}
+
+-- .commit --
+Break test with build error
+
+-- .run_test --
+Test regressions detected between initial commit (149798f10aaf06bc4700bb62d46ed6d8d88e35ad) and HEAD:
+
+1: sketch.dev: Was passing, now has build/vet errors
+
+
+Please fix before proceeding.
diff --git a/claudetool/testdata/passing_to_failing.txtar b/claudetool/testdata/passing_to_failing.txtar
new file mode 100644
index 0000000..31a0556
--- /dev/null
+++ b/claudetool/testdata/passing_to_failing.txtar
@@ -0,0 +1,32 @@
+Going from a passing test to a failing test is a regression
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+}
+
+-- .commit --
+Initial commit with passing test
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+	t.FailNow()
+}
+
+-- .commit --
+Break test
+
+-- .run_test --
+Test regressions detected between initial commit (149798f10aaf06bc4700bb62d46ed6d8d88e35ad) and HEAD:
+
+1: sketch.dev.TestP: Was passing, now failing
+
+
+Please fix before proceeding.
diff --git a/claudetool/testdata/passing_to_failing_subdir.txtar b/claudetool/testdata/passing_to_failing_subdir.txtar
new file mode 100644
index 0000000..7649554
--- /dev/null
+++ b/claudetool/testdata/passing_to_failing_subdir.txtar
@@ -0,0 +1,32 @@
+Going from a passing test to a failing test in a subdir is a regression
+
+-- foo/p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+}
+
+-- .commit --
+Initial commit with passing test
+
+-- foo/p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+	t.FailNow()
+}
+
+-- .commit --
+Break test
+
+-- .run_test --
+Test regressions detected between initial commit (8c6c71d0beac5e27b87dfc3f2fd9d274b62c3d3a) and HEAD:
+
+1: sketch.dev/foo.TestP: Was passing, now failing
+
+
+Please fix before proceeding.
diff --git a/claudetool/testdata/skipped_to_build_error.txtar b/claudetool/testdata/skipped_to_build_error.txtar
new file mode 100644
index 0000000..f648572
--- /dev/null
+++ b/claudetool/testdata/skipped_to_build_error.txtar
@@ -0,0 +1,33 @@
+Going from a skipped test to a test with build errors is a regression
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+	t.SkipNow()
+}
+
+-- .commit --
+Initial commit with skipped test
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+	(
+}
+
+-- .commit --
+Change skipped test to test with build error
+
+-- .run_test --
+Test regressions detected between initial commit (3635353dfefb2fa652728318567209ccfc4aba42) and HEAD:
+
+1: sketch.dev: Was passing, now has build/vet errors
+
+
+Please fix before proceeding.
diff --git a/claudetool/testdata/skipped_to_failing.txtar b/claudetool/testdata/skipped_to_failing.txtar
new file mode 100644
index 0000000..50348d2
--- /dev/null
+++ b/claudetool/testdata/skipped_to_failing.txtar
@@ -0,0 +1,33 @@
+Going from a skipped test to a failing test is a regression
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+	t.SkipNow()
+}
+
+-- .commit --
+Initial commit with skipped test
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+	t.FailNow()
+}
+
+-- .commit --
+Change skipped test to failing test
+
+-- .run_test --
+Test regressions detected between initial commit (3635353dfefb2fa652728318567209ccfc4aba42) and HEAD:
+
+1: sketch.dev.TestP: Was skipped, now failing
+
+
+Please fix before proceeding.
diff --git a/claudetool/testdata/vet_error_test.txtar b/claudetool/testdata/vet_error_test.txtar
new file mode 100644
index 0000000..8c9dc64
--- /dev/null
+++ b/claudetool/testdata/vet_error_test.txtar
@@ -0,0 +1,36 @@
+Check that vet errors are detected in a test run
+
+-- p.go --
+package p
+
+-- .commit --
+Initial commit
+
+-- p.go --
+package p
+
+import (
+	"fmt"
+)
+
+func F()  {
+	fmt.Printf("Not a string: %s\n", 10)
+}
+
+-- .commit --
+Add vet error
+
+-- .run_test --
+Test regressions detected between initial commit (656e4d573cb77a30ebce29ebb974533ece282946) and HEAD:
+
+1: sketch.dev: Previously had no tests, now has build/vet errors
+
+
+Gopls check issues detected:
+
+1. /PATH/TO/REPO/p.go:8:2-38: fmt.Printf format %s has arg 10 of wrong type int
+
+IMPORTANT: Only fix new gopls check issues in parts of the code that you have already edited. Do not change existing code that was not part of your current edits.
+
+
+Please fix before proceeding.
diff --git a/claudetool/teststatus_string.go b/claudetool/teststatus_string.go
new file mode 100644
index 0000000..a5b159b
--- /dev/null
+++ b/claudetool/teststatus_string.go
@@ -0,0 +1,28 @@
+// Code generated by "stringer -type=testStatus -trimprefix=testStatus"; DO NOT EDIT.
+
+package claudetool
+
+import "strconv"
+
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[testStatusUnknown-0]
+	_ = x[testStatusPass-1]
+	_ = x[testStatusFail-2]
+	_ = x[testStatusBuildFail-3]
+	_ = x[testStatusSkip-4]
+	_ = x[testStatusNoTests-5]
+}
+
+const _testStatus_name = "UnknownPassFailBuildFailSkipNoTests"
+
+var _testStatus_index = [...]uint8{0, 7, 11, 15, 24, 28, 35}
+
+func (i testStatus) String() string {
+	if i < 0 || i >= testStatus(len(_testStatus_index)-1) {
+		return "testStatus(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+	return _testStatus_name[_testStatus_index[i]:_testStatus_index[i+1]]
+}