claudetool/codereview: new package extracted from claudetool

Co-Authored-By: sketch <hello@sketch.dev>
diff --git a/claudetool/codereview/codereview.go b/claudetool/codereview/codereview.go
new file mode 100644
index 0000000..bf118ee
--- /dev/null
+++ b/claudetool/codereview/codereview.go
@@ -0,0 +1,356 @@
+package codereview
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"log/slog"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+
+	"sketch.dev/claudetool"
+)
+
+// A CodeReviewer manages quality checks.
+type CodeReviewer struct {
+	repoRoot        string
+	initialCommit   string
+	initialStatus   []fileStatus // git status of files at initial commit, absolute paths
+	reviewed        []string     // history of all commits which have been reviewed
+	initialWorktree string       // git worktree at initial commit, absolute path
+	llmReview       bool         // enables a subagent LLM review
+}
+
+const (
+	NoLLMReview = false
+	DoLLMReview = true
+)
+
+func NewCodeReviewer(ctx context.Context, repoRoot, initialCommit string, llmReview bool) (*CodeReviewer, error) {
+	r := &CodeReviewer{
+		repoRoot:      repoRoot,
+		initialCommit: initialCommit,
+		llmReview:     llmReview,
+	}
+	if r.repoRoot == "" {
+		return nil, fmt.Errorf("NewCodeReviewer: repoRoot must be non-empty")
+	}
+	if r.initialCommit == "" {
+		return nil, fmt.Errorf("NewCodeReviewer: initialCommit must be non-empty")
+	}
+	// Confirm that root is in fact the git repo root.
+	root, err := claudetool.FindRepoRoot(r.repoRoot)
+	if err != nil {
+		return nil, err
+	}
+	if root != r.repoRoot {
+		return nil, fmt.Errorf("NewCodeReviewer: repoRoot=%q but git repo root is %q", r.repoRoot, root)
+	}
+
+	// Get an initial list of dirty and untracked files.
+	// We'll filter them out later when deciding whether the worktree is clean.
+	status, err := r.repoStatus(ctx)
+	if err != nil {
+		return nil, err
+	}
+	r.initialStatus = status
+	return r, nil
+}
+
+// Autoformat formats all files changed in HEAD.
+// It returns a list of all files that were formatted.
+// It is best-effort only.
+func (r *CodeReviewer) Autoformat(ctx context.Context) []string {
+	// Refuse to format if HEAD == r.InitialCommit
+	head, err := r.CurrentCommit(ctx)
+	if err != nil {
+		slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get current commit", "err", err)
+		return nil
+	}
+	parent, err := r.ResolveCommit(ctx, "HEAD^1")
+	if err != nil {
+		slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get parent commit", "err", err)
+		return nil
+	}
+	if head == r.initialCommit {
+		slog.WarnContext(ctx, "CodeReviewer.Autoformat refusing to format because HEAD == InitialCommit")
+		return nil
+	}
+	// Retrieve a list of all files changed
+	// TODO: instead of one git diff --name-only and then N --name-status, do one --name-status.
+	changedFiles, err := r.changedFiles(ctx, r.initialCommit, head)
+	if err != nil {
+		slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get changed files", "err", err)
+		return nil
+	}
+
+	// General strategy: For all changed files,
+	// run the strictest formatter that passes on the original version.
+	// TODO: add non-Go formatters?
+	// TODO: at a minimum, for common file types, ensure trailing newlines and maybe trim trailing whitespace per line?
+	var fmtFiles []string
+	for _, file := range changedFiles {
+		if !strings.HasSuffix(file, ".go") {
+			continue
+		}
+		fileStatus, err := r.gitFileStatus(ctx, file)
+		if err != nil {
+			slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get file status", "file", file, "err", err)
+			continue
+		}
+		if fileStatus == "D" { // deleted, nothing to format
+			continue
+		}
+		code, err := r.getFileContentAtCommit(ctx, file, head)
+		if err != nil {
+			slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get file content at head", "file", file, "err", err)
+			continue
+		}
+		if claudetool.IsAutogeneratedGoFile(code) { // leave autogenerated files alone
+			continue
+		}
+		onDisk, err := os.ReadFile(file)
+		if err != nil {
+			slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to read file", "file", file, "err", err)
+			continue
+		}
+		if !bytes.Equal(code, onDisk) { // file has been modified since HEAD
+			slog.WarnContext(ctx, "CodeReviewer.Autoformat file modified since HEAD", "file", file, "err", err)
+			continue
+		}
+		var formatterToUse string
+		if fileStatus == "A" {
+			formatterToUse = "gofumpt" // newly added, so we can format how we please: use gofumpt
+		} else {
+			prev, err := r.getFileContentAtCommit(ctx, file, parent)
+			if err != nil {
+				slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get file content at parent", "file", file, "err", err)
+				continue
+			}
+			formatterToUse = r.pickFormatter(ctx, prev) // pick the strictest formatter that passes on the original version
+		}
+
+		// Apply the chosen formatter to the current file
+		newCode := r.runFormatter(ctx, formatterToUse, code)
+		if newCode == nil { // no changes made
+			continue
+		}
+		// write to disk
+		if err := os.WriteFile(file, newCode, 0o600); err != nil {
+			slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to write formatted file", "file", file, "err", err)
+			continue
+		}
+		fmtFiles = append(fmtFiles, file)
+	}
+	return fmtFiles
+}
+
+// RequireNormalGitState checks that the git repo state is pretty normal.
+func (r *CodeReviewer) RequireNormalGitState(_ context.Context) error {
+	rebaseDirs := []string{"rebase-merge", "rebase-apply"}
+	for _, dir := range rebaseDirs {
+		_, err := os.Stat(filepath.Join(r.repoRoot, dir))
+		if err == nil {
+			return fmt.Errorf("git repo is not clean: rebase in progress")
+		}
+	}
+	filesReason := map[string]string{
+		"MERGE_HEAD":       "merge is in progress",
+		"CHERRY_PICK_HEAD": "cherry-pick is in progress",
+		"REVERT_HEAD":      "revert is in progress",
+		"BISECT_LOG":       "bisect is in progress",
+	}
+	for file, reason := range filesReason {
+		_, err := os.Stat(filepath.Join(r.repoRoot, file))
+		if err == nil {
+			return fmt.Errorf("git repo is not clean: %s", reason)
+		}
+	}
+	return nil
+}
+
+func (r *CodeReviewer) RequireNoUncommittedChanges(ctx context.Context) error {
+	// Check that there are no uncommitted changes, whether staged or not.
+	// (Changes in r.initialStatus are OK, no other changes are.)
+	statuses, err := r.repoStatus(ctx)
+	if err != nil {
+		return fmt.Errorf("unable to get repo status: %w", err)
+	}
+	uncommitted := new(strings.Builder)
+	for _, status := range statuses {
+		if !r.initialStatusesContainFile(status.Path) {
+			fmt.Fprintf(uncommitted, "%s %s\n", status.Path, status.RawStatus)
+		}
+	}
+	if uncommitted.Len() > 0 {
+		return fmt.Errorf("uncommitted changes in repo, please commit or revert:\n%s", uncommitted.String())
+	}
+	return nil
+}
+
+func (r *CodeReviewer) initialStatusesContainFile(file string) bool {
+	for _, s := range r.initialStatus {
+		if s.Path == file {
+			return true
+		}
+	}
+	return false
+}
+
+type fileStatus struct {
+	Path      string
+	RawStatus string // always 2 characters
+}
+
+func (r *CodeReviewer) repoStatus(ctx context.Context) ([]fileStatus, error) {
+	// Run git status --porcelain, split into lines
+	cmd := exec.CommandContext(ctx, "git", "status", "--porcelain")
+	cmd.Dir = r.repoRoot
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		return nil, fmt.Errorf("failed to run git status: %w\n%s", err, out)
+	}
+	var statuses []fileStatus
+	for line := range strings.Lines(string(out)) {
+		if len(line) == 0 {
+			continue
+		}
+		if len(line) < 3 {
+			return nil, fmt.Errorf("invalid status line: %s", line)
+		}
+		path := line[3:]
+		status := line[:2]
+		absPath := r.absPath(path)
+		statuses = append(statuses, fileStatus{Path: absPath, RawStatus: status})
+	}
+	return statuses, nil
+}
+
+// CurrentCommit retrieves the current git commit hash
+func (r *CodeReviewer) CurrentCommit(ctx context.Context) (string, error) {
+	return r.ResolveCommit(ctx, "HEAD")
+}
+
+func (r *CodeReviewer) ResolveCommit(ctx context.Context, ref string) (string, error) {
+	cmd := exec.CommandContext(ctx, "git", "rev-parse", ref)
+	cmd.Dir = r.repoRoot
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		return "", fmt.Errorf("failed to get current commit hash: %w\n%s", err, out)
+	}
+	return strings.TrimSpace(string(out)), nil
+}
+
+func (r *CodeReviewer) absPath(relPath string) string {
+	return filepath.Clean(filepath.Join(r.repoRoot, relPath))
+}
+
+// gitFileStatus returns the status of a file (A for added, M for modified, D for deleted, etc.)
+func (r *CodeReviewer) gitFileStatus(ctx context.Context, file string) (string, error) {
+	cmd := exec.CommandContext(ctx, "git", "diff", "--name-status", r.initialCommit, "HEAD", "--", file)
+	cmd.Dir = r.repoRoot
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		return "", fmt.Errorf("failed to get file status: %w\n%s", err, out)
+	}
+	status := strings.TrimSpace(string(out))
+	if status == "" {
+		return "", fmt.Errorf("no status found for file: %s", file)
+	}
+	return string(status[0]), nil
+}
+
+// getFileContentAtCommit retrieves file content at a specific commit
+func (r *CodeReviewer) getFileContentAtCommit(ctx context.Context, file, commit string) ([]byte, error) {
+	relFile, err := filepath.Rel(r.repoRoot, file)
+	if err != nil {
+		slog.WarnContext(ctx, "CodeReviewer.getFileContentAtCommit: failed to get relative path", "repo_root", r.repoRoot, "file", file, "err", err)
+		file = relFile
+	}
+	cmd := exec.CommandContext(ctx, "git", "show", fmt.Sprintf("%s:%s", commit, relFile))
+	cmd.Dir = r.repoRoot
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		return nil, fmt.Errorf("failed to get file content at commit %s: %w\n%s", commit, err, out)
+	}
+	return out, nil
+}
+
+// runFormatter runs the specified formatter on a file and returns the results.
+// A nil result indicates that the file is unchanged, or that an error occurred.
+func (r *CodeReviewer) runFormatter(ctx context.Context, formatter string, content []byte) []byte {
+	if formatter == "" {
+		return nil // no formatter
+	}
+	// Run the formatter and capture the output
+	cmd := exec.CommandContext(ctx, formatter)
+	cmd.Dir = r.repoRoot
+	cmd.Stdin = bytes.NewReader(content)
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		// probably a parse error, err on the side of safety
+		return nil
+	}
+	if bytes.Equal(content, out) {
+		return nil // no changes
+	}
+	return out
+}
+
+// formatterWouldChange reports whether a formatter would make changes to the content.
+// If the contents are invalid, it returns false.
+// It works by piping the content to the formatter with the -l flag.
+func (r *CodeReviewer) formatterWouldChange(ctx context.Context, formatter string, content []byte) bool {
+	cmd := exec.CommandContext(ctx, formatter, "-l")
+	cmd.Dir = r.repoRoot
+	cmd.Stdin = bytes.NewReader(content)
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		// probably a parse error, err on the side of safety
+		return false
+	}
+
+	// If the output is empty, the file passes the formatter
+	// If the output contains "<standard input>", the file would be changed
+	return len(bytes.TrimSpace(out)) > 0
+}
+
+// pickFormatter picks a formatter to use for code.
+// If something goes wrong, it recommends no formatter (empty string).
+func (r *CodeReviewer) pickFormatter(ctx context.Context, code []byte) string {
+	// Test each formatter from strictest to least strict.
+	// Keep the first one that doesn't make changes.
+	formatters := []string{"gofumpt", "goimports", "gofmt"}
+	for _, formatter := range formatters {
+		if r.formatterWouldChange(ctx, formatter, code) {
+			continue
+		}
+		return formatter
+	}
+	return "" // no safe formatter found
+}
+
+// changedFiles retrieves a list of all files changed between two commits
+func (r *CodeReviewer) changedFiles(ctx context.Context, fromCommit, toCommit string) ([]string, error) {
+	cmd := exec.CommandContext(ctx, "git", "diff", "--name-only", fromCommit, toCommit)
+	cmd.Dir = r.repoRoot
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		return nil, fmt.Errorf("failed to get changed files: %w\n%s", err, out)
+	}
+	var files []string
+	for line := range strings.Lines(string(out)) {
+		line = strings.TrimSpace(line)
+		if len(line) == 0 {
+			continue
+		}
+		path := r.absPath(line)
+		if r.initialStatusesContainFile(path) {
+			continue
+		}
+		files = append(files, path)
+	}
+	return files, nil
+}
diff --git a/claudetool/codereview/codereview_test.go b/claudetool/codereview/codereview_test.go
new file mode 100644
index 0000000..29e05ec
--- /dev/null
+++ b/claudetool/codereview/codereview_test.go
@@ -0,0 +1,294 @@
+package codereview
+
+import (
+	"bytes"
+	"cmp"
+	"context"
+	"flag"
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"slices"
+	"strings"
+	"testing"
+
+	"golang.org/x/tools/txtar"
+)
+
+// updateTests is set to true when the -update flag is used.
+// This will update the expected test results instead of failing tests.
+var updateTests = flag.Bool("update", false, "update expected test results instead of failing tests")
+
+// TestCodereviewDifferential runs all the end-to-end tests for the codereview and differential packages.
+// Each test is defined in a .txtar file in the testdata directory.
+func TestCodereviewDifferential(t *testing.T) {
+	entries, err := os.ReadDir("testdata")
+	if err != nil {
+		t.Fatalf("failed to read testdata directory: %v", err)
+	}
+	for _, entry := range entries {
+		if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".txtar") {
+			continue
+		}
+		testPath := filepath.Join("testdata", entry.Name())
+		testName := strings.TrimSuffix(entry.Name(), ".txtar")
+		t.Run(testName, func(t *testing.T) {
+			t.Parallel()
+			runE2ETest(t, testPath, *updateTests)
+		})
+	}
+}
+
+// runE2ETest executes a single end-to-end test from a .txtar file.
+func runE2ETest(t *testing.T, testPath string, update bool) {
+	orig, err := os.ReadFile(testPath)
+	if err != nil {
+		t.Fatalf("failed to read test file %s: %v", testPath, err)
+	}
+	archive, err := txtar.ParseFile(testPath)
+	if err != nil {
+		t.Fatalf("failed to parse txtar file %s: %v", testPath, err)
+	}
+
+	tmpDir := t.TempDir()
+	// resolve temp dir path so that we can canonicalize/normalize it later
+	tmpDir = resolveRealPath(tmpDir)
+
+	if err := initGoModule(tmpDir); err != nil {
+		t.Fatalf("failed to initialize Go module: %v", err)
+	}
+	if err := initGitRepo(tmpDir); err != nil {
+		t.Fatalf("failed to initialize git repository: %v", err)
+	}
+	if err := processTestFiles(t, archive, tmpDir, update); err != nil {
+		t.Fatalf("error processing test files: %v", err)
+	}
+
+	// If we're updating, write back the modified archive to the file
+	if update {
+		updatedContent := txtar.Format(archive)
+		// only write back changes, avoids git status churn
+		if !bytes.Equal(orig, updatedContent) {
+			if err := os.WriteFile(testPath, updatedContent, 0o644); err != nil {
+				t.Errorf("Failed to update test file %s: %v", testPath, err)
+			}
+		}
+	}
+}
+
+func gitCommitEnv() []string {
+	return append(os.Environ(),
+		"GIT_AUTHOR_NAME=Test Author",
+		"GIT_AUTHOR_EMAIL=test@example.com",
+		"GIT_COMMITTER_NAME=Test Committer",
+		"GIT_COMMITTER_EMAIL=test@example.com",
+		"GIT_AUTHOR_DATE=2025-01-01T00:00:00Z",
+		"GIT_COMMITTER_DATE=2025-01-01T00:00:00Z",
+	)
+}
+
+// initGitRepo initializes a new git repository in the specified directory.
+func initGitRepo(dir string) error {
+	cmd := exec.Command("git", "init")
+	cmd.Dir = dir
+	err := cmd.Run()
+	if err != nil {
+		return fmt.Errorf("error initializing git repository: %w", err)
+	}
+	// create a single commit out of everything there now
+	cmd = exec.Command("git", "add", ".")
+	cmd.Dir = dir
+	err = cmd.Run()
+	if err != nil {
+		return fmt.Errorf("error staging files: %w", err)
+	}
+	cmd = exec.Command("git", "commit", "-m", "create repo")
+	cmd.Dir = dir
+	cmd.Env = gitCommitEnv()
+	err = cmd.Run()
+	if err != nil {
+		return fmt.Errorf("error making initial commit: %w", err)
+	}
+	return nil
+}
+
+// processTestFiles processes the files in the txtar archive in sequence.
+func processTestFiles(t *testing.T, archive *txtar.Archive, dir string, update bool) error {
+	var initialCommit string
+	filesForNextCommit := make(map[string]bool)
+
+	for i, file := range archive.Files {
+		switch file.Name {
+		case ".commit":
+			commit, err := makeGitCommit(dir, string(file.Data), filesForNextCommit)
+			if err != nil {
+				return fmt.Errorf("error making git commit: %w", err)
+			}
+			clear(filesForNextCommit)
+			initialCommit = cmp.Or(initialCommit, commit)
+			// fmt.Println("initial commit:", initialCommit)
+			// cmd := exec.Command("git", "log")
+			// cmd.Dir = dir
+			// cmd.Stdout = os.Stdout
+			// cmd.Run()
+
+		case ".run_test":
+			got, err := runDifferentialTest(dir, initialCommit)
+			if err != nil {
+				return fmt.Errorf("error running differential test: %w", err)
+			}
+			want := string(file.Data)
+
+			commitCleaner := strings.NewReplacer(initialCommit, "INITIAL_COMMIT_HASH")
+			got = commitCleaner.Replace(got)
+			want = commitCleaner.Replace(want)
+
+			if update {
+				archive.Files[i].Data = []byte(got)
+				break
+			}
+			if strings.TrimSpace(want) != strings.TrimSpace(got) {
+				t.Errorf("Results don't match.\nExpected:\n%s\n\nActual:\n%s", want, got)
+			}
+
+		case ".run_autoformat":
+			if initialCommit == "" {
+				return fmt.Errorf("initial commit not set, cannot run autoformat")
+			}
+
+			got, err := runAutoformat(dir, initialCommit)
+			if err != nil {
+				return fmt.Errorf("error running autoformat: %w", err)
+			}
+			slices.Sort(got)
+
+			if update {
+				correct := strings.Join(got, "\n") + "\n"
+				archive.Files[i].Data = []byte(correct)
+				break
+			}
+
+			want := strings.Split(strings.TrimSpace(string(file.Data)), "\n")
+			if !slices.Equal(want, got) {
+				t.Errorf("Formatted files don't match.\nExpected:\n%s\n\nActual:\n%s", want, got)
+			}
+
+		default:
+			filePath := filepath.Join(dir, file.Name)
+			if err := os.MkdirAll(filepath.Dir(filePath), 0o700); err != nil {
+				return fmt.Errorf("error creating directory for %s: %w", file.Name, err)
+			}
+			data := file.Data
+			// Remove second trailing newline if present.
+			// An annoyance of the txtar format, messes with gofmt.
+			if bytes.HasSuffix(data, []byte("\n\n")) {
+				data = bytes.TrimSuffix(data, []byte("\n"))
+			}
+			if err := os.WriteFile(filePath, file.Data, 0o600); err != nil {
+				return fmt.Errorf("error writing file %s: %w", file.Name, err)
+			}
+			filesForNextCommit[file.Name] = true
+		}
+	}
+
+	return nil
+}
+
+// makeGitCommit commits the specified files with the given message.
+// Returns the commit hash.
+func makeGitCommit(dir, message string, files map[string]bool) (string, error) {
+	for file := range files {
+		cmd := exec.Command("git", "add", file)
+		cmd.Dir = dir
+		if err := cmd.Run(); err != nil {
+			return "", fmt.Errorf("error staging file %s: %w", file, err)
+		}
+	}
+	message = cmp.Or(message, "Test commit")
+
+	// Make the commit with fixed author, committer, and date for stable hashes
+	cmd := exec.Command("git", "commit", "--allow-empty", "-m", message)
+	cmd.Dir = dir
+	cmd.Env = gitCommitEnv()
+	if err := cmd.Run(); err != nil {
+		return "", fmt.Errorf("error making commit: %w", err)
+	}
+
+	// Get the commit hash
+	cmd = exec.Command("git", "rev-parse", "HEAD")
+	cmd.Dir = dir
+	out, err := cmd.Output()
+	if err != nil {
+		return "", fmt.Errorf("error getting commit hash: %w", err)
+	}
+
+	return strings.TrimSpace(string(out)), nil
+}
+
+// runDifferentialTest runs the code review tool on the repository and returns the result.
+func runDifferentialTest(dir, initialCommit string) (string, error) {
+	if initialCommit == "" {
+		return "", fmt.Errorf("initial commit not set, cannot run differential test")
+	}
+
+	// Create a code reviewer for the repository
+	ctx := context.Background()
+	reviewer, err := NewCodeReviewer(ctx, dir, initialCommit, NoLLMReview)
+	if err != nil {
+		return "", fmt.Errorf("error creating code reviewer: %w", err)
+	}
+
+	// Run the code review
+	result, err := reviewer.Run(ctx, nil)
+	if err != nil {
+		return "", fmt.Errorf("error running code review: %w", err)
+	}
+
+	// Normalize paths in the result
+	normalized := normalizePaths(result, dir)
+	return normalized, nil
+}
+
+// normalizePaths replaces the temp directory paths with a standard placeholder
+func normalizePaths(result string, tempDir string) string {
+	return strings.ReplaceAll(result, tempDir, "/PATH/TO/REPO")
+}
+
+// initGoModule initializes a Go module in the specified directory.
+func initGoModule(dir string) error {
+	cmd := exec.Command("go", "mod", "init", "sketch.dev")
+	cmd.Dir = dir
+	return cmd.Run()
+}
+
+// runAutoformat runs the autoformat function on the repository and returns the list of formatted files.
+func runAutoformat(dir, initialCommit string) ([]string, error) {
+	if initialCommit == "" {
+		return nil, fmt.Errorf("initial commit not set, cannot run autoformat")
+	}
+	ctx := context.Background()
+	reviewer, err := NewCodeReviewer(ctx, dir, initialCommit, NoLLMReview)
+	if err != nil {
+		return nil, fmt.Errorf("error creating code reviewer: %w", err)
+	}
+	formattedFiles := reviewer.Autoformat(ctx)
+	normalizedFiles := make([]string, len(formattedFiles))
+	for i, file := range formattedFiles {
+		normalizedFiles[i] = normalizePaths(file, dir)
+	}
+	slices.Sort(normalizedFiles)
+	return normalizedFiles, nil
+}
+
+// resolveRealPath follows symlinks and returns the real path
+// This handles platform-specific behaviors like macOS's /private prefix
+func resolveRealPath(path string) string {
+	// Follow symlinks to get the real path
+	realPath, err := filepath.EvalSymlinks(path)
+	if err != nil {
+		// If we can't resolve symlinks, just return the original path
+		return path
+	}
+	return realPath
+}
diff --git a/claudetool/codereview/differential.go b/claudetool/codereview/differential.go
new file mode 100644
index 0000000..6aa8dfe
--- /dev/null
+++ b/claudetool/codereview/differential.go
@@ -0,0 +1,1040 @@
+package codereview
+
+import (
+	"bytes"
+	"cmp"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"io/fs"
+	"log/slog"
+	"maps"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"slices"
+	"strings"
+	"sync"
+	"time"
+
+	"golang.org/x/tools/go/packages"
+	"sketch.dev/llm"
+)
+
+// This file does differential quality analysis of a commit relative to a base commit.
+
+// Tool returns a tool spec for a CodeReview tool backed by r.
+func (r *CodeReviewer) Tool() *llm.Tool {
+	spec := &llm.Tool{
+		Name:        "codereview",
+		Description: `Run an automated code review.`,
+		// If you modify this, update the termui template for prettier rendering.
+		InputSchema: llm.MustSchema(`{"type": "object", "properties": {}}`),
+		Run:         r.Run,
+	}
+	return spec
+}
+
+func (r *CodeReviewer) Run(ctx context.Context, m json.RawMessage) (string, error) {
+	// NOTE: If you add or modify error messages here, update the corresponding UI parsing in:
+	// webui/src/web-components/sketch-tool-card.ts (SketchToolCardCodeReview.getStatusIcon)
+	if err := r.RequireNormalGitState(ctx); err != nil {
+		slog.DebugContext(ctx, "CodeReviewer.Run: failed to check for normal git state", "err", err)
+		return "", err
+	}
+	if err := r.RequireNoUncommittedChanges(ctx); err != nil {
+		slog.DebugContext(ctx, "CodeReviewer.Run: failed to check for uncommitted changes", "err", err)
+		return "", err
+	}
+
+	// Check that the current commit is not the initial commit
+	currentCommit, err := r.CurrentCommit(ctx)
+	if err != nil {
+		slog.DebugContext(ctx, "CodeReviewer.Run: failed to get current commit", "err", err)
+		return "", err
+	}
+	if r.IsInitialCommit(currentCommit) {
+		slog.DebugContext(ctx, "CodeReviewer.Run: current commit is initial commit, nothing to review")
+		return "", fmt.Errorf("no new commits have been added, nothing to review")
+	}
+
+	// No matter what failures happen from here out, we will declare this to have been reviewed.
+	// This should help avoid the model getting blocked by a broken code review tool.
+	r.reviewed = append(r.reviewed, currentCommit)
+
+	changedFiles, err := r.changedFiles(ctx, r.initialCommit, currentCommit)
+	if err != nil {
+		slog.DebugContext(ctx, "CodeReviewer.Run: failed to get changed files", "err", err)
+		return "", err
+	}
+
+	// Prepare to analyze before/after for the impacted files.
+	// We use the current commit to determine what packages exist and are impacted.
+	// The packages in the initial commit may be different.
+	// Good enough for now.
+	// TODO: do better
+	allPkgs, err := r.packagesForFiles(ctx, changedFiles)
+	if err != nil {
+		// TODO: log and skip to stuff that doesn't require packages
+		slog.DebugContext(ctx, "CodeReviewer.Run: failed to get packages for files", "err", err)
+		return "", err
+	}
+	allPkgList := slices.Collect(maps.Keys(allPkgs))
+
+	var errorMessages []string // problems we want the model to address
+	var infoMessages []string  // info the model should consider
+
+	// Find potentially related files that should also be considered
+	// TODO: add some caching here, since this depends only on the initial commit and the changed files, not the details of the changes
+	relatedFiles, err := r.findRelatedFiles(ctx, changedFiles)
+	if err != nil {
+		slog.DebugContext(ctx, "CodeReviewer.Run: failed to find related files", "err", err)
+	} else {
+		relatedMsg := r.formatRelatedFiles(relatedFiles)
+		if relatedMsg != "" {
+			infoMessages = append(infoMessages, relatedMsg)
+		}
+	}
+
+	testMsg, err := r.checkTests(ctx, allPkgList)
+	if err != nil {
+		slog.DebugContext(ctx, "CodeReviewer.Run: failed to check tests", "err", err)
+		return "", err
+	}
+	if testMsg != "" {
+		errorMessages = append(errorMessages, testMsg)
+	}
+
+	goplsMsg, err := r.checkGopls(ctx, changedFiles) // includes vet checks
+	if err != nil {
+		slog.DebugContext(ctx, "CodeReviewer.Run: failed to check gopls", "err", err)
+		return "", err
+	}
+	if goplsMsg != "" {
+		errorMessages = append(errorMessages, goplsMsg)
+	}
+
+	if r.llmReview {
+		llmComments, err := r.doLLMReview(ctx)
+		if err != nil {
+			// Log the error but don't fail the codereview if this check fails
+			slog.WarnContext(ctx, "CodeReviewer.Run: error doing LLM review", "err", err)
+		}
+		if llmComments != "" {
+			infoMessages = append(infoMessages, llmComments)
+		}
+	}
+
+	// NOTE: If you change this output format, update the corresponding UI parsing in:
+	// webui/src/web-components/sketch-tool-card.ts (SketchToolCardCodeReview.getStatusIcon)
+	buf := new(strings.Builder)
+	if len(infoMessages) > 0 {
+		buf.WriteString("# Info\n\n")
+		buf.WriteString(strings.Join(infoMessages, "\n\n"))
+		buf.WriteString("\n\n")
+	}
+	if len(errorMessages) > 0 {
+		buf.WriteString("# Errors\n\n")
+		buf.WriteString(strings.Join(errorMessages, "\n\n"))
+		buf.WriteString("\n\nPlease fix before proceeding.\n")
+	}
+	if buf.Len() == 0 {
+		buf.WriteString("OK")
+	}
+	return buf.String(), nil
+}
+
+func (r *CodeReviewer) initializeInitialCommitWorktree(ctx context.Context) error {
+	if r.initialWorktree != "" {
+		return nil
+	}
+	tmpDir, err := os.MkdirTemp("", "sketch-codereview-worktree")
+	if err != nil {
+		return err
+	}
+	worktreeCmd := exec.CommandContext(ctx, "git", "worktree", "add", "--detach", tmpDir, r.initialCommit)
+	worktreeCmd.Dir = r.repoRoot
+	out, err := worktreeCmd.CombinedOutput()
+	if err != nil {
+		return fmt.Errorf("unable to create worktree for initial commit: %w\n%s", err, out)
+	}
+	r.initialWorktree = tmpDir
+	return nil
+}
+
+func (r *CodeReviewer) checkTests(ctx context.Context, pkgList []string) (string, error) {
+	goTestArgs := []string{"test", "-json", "-v"}
+	goTestArgs = append(goTestArgs, pkgList...)
+
+	afterTestCmd := exec.CommandContext(ctx, "go", goTestArgs...)
+	afterTestCmd.Dir = r.repoRoot
+	afterTestOut, _ := afterTestCmd.Output()
+	// unfortunately, we can't short-circuit here even if all tests pass,
+	// because we need to check for skipped tests.
+
+	err := r.initializeInitialCommitWorktree(ctx)
+	if err != nil {
+		return "", err
+	}
+
+	beforeTestCmd := exec.CommandContext(ctx, "go", goTestArgs...)
+	beforeTestCmd.Dir = r.initialWorktree
+	beforeTestOut, _ := beforeTestCmd.Output() // ignore error, interesting info is in the output
+
+	// Parse the jsonl test results
+	beforeResults, beforeParseErr := parseTestResults(beforeTestOut)
+	if beforeParseErr != nil {
+		return "", fmt.Errorf("unable to parse test results for initial commit: %w\n%s", beforeParseErr, beforeTestOut)
+	}
+	afterResults, afterParseErr := parseTestResults(afterTestOut)
+	if afterParseErr != nil {
+		return "", fmt.Errorf("unable to parse test results for current commit: %w\n%s", afterParseErr, afterTestOut)
+	}
+	testRegressions, err := r.compareTestResults(beforeResults, afterResults)
+	if err != nil {
+		return "", fmt.Errorf("failed to compare test results: %w", err)
+	}
+	// TODO: better output formatting?
+	res := r.formatTestRegressions(testRegressions)
+	return res, nil
+}
+
+// GoplsIssue represents a single issue reported by gopls check
+type GoplsIssue struct {
+	Position string // File position in format "file:line:col-range"
+	Message  string // Description of the issue
+}
+
+// checkGopls runs gopls check on the provided files in both the current and initial state,
+// compares the results, and reports any new issues introduced in the current state.
+func (r *CodeReviewer) checkGopls(ctx context.Context, changedFiles []string) (string, error) {
+	if len(changedFiles) == 0 {
+		return "", nil // no files to check
+	}
+
+	// Filter out non-Go files as gopls only works on Go files
+	// and verify they still exist (not deleted)
+	var goFiles []string
+	for _, file := range changedFiles {
+		if !strings.HasSuffix(file, ".go") {
+			continue // not a Go file
+		}
+
+		// Check if the file still exists (not deleted)
+		if _, err := os.Stat(file); os.IsNotExist(err) {
+			continue // file doesn't exist anymore (deleted)
+		}
+
+		goFiles = append(goFiles, file)
+	}
+
+	if len(goFiles) == 0 {
+		return "", nil // no Go files to check
+	}
+
+	// Run gopls check on the current state
+	goplsArgs := append([]string{"check"}, goFiles...)
+
+	afterGoplsCmd := exec.CommandContext(ctx, "gopls", goplsArgs...)
+	afterGoplsCmd.Dir = r.repoRoot
+	afterGoplsOut, err := afterGoplsCmd.CombinedOutput() // gopls returns non-zero if it finds issues
+	if err != nil {
+		// Check if the output looks like real gopls issues or if it's just error output
+		if !looksLikeGoplsIssues(afterGoplsOut) {
+			slog.WarnContext(ctx, "CodeReviewer.checkGopls: gopls check failed to run properly", "err", err, "output", string(afterGoplsOut))
+			return "", nil // Skip rather than failing the entire code review
+		}
+		// Otherwise, proceed with parsing - it's likely just the non-zero exit code due to found issues
+	}
+
+	// Parse the output
+	afterIssues := parseGoplsOutput(r.repoRoot, afterGoplsOut)
+
+	// If no issues were found, we're done
+	if len(afterIssues) == 0 {
+		return "", nil
+	}
+
+	// Gopls detected issues in the current state, check if they existed in the initial state
+	initErr := r.initializeInitialCommitWorktree(ctx)
+	if initErr != nil {
+		return "", err
+	}
+
+	// For each file that exists in the initial commit, run gopls check
+	var initialFilesToCheck []string
+	for _, file := range goFiles {
+		// Get relative path for git operations
+		relFile, err := filepath.Rel(r.repoRoot, file)
+		if err != nil {
+			slog.WarnContext(ctx, "CodeReviewer.checkGopls: failed to get relative path", "repo_root", r.repoRoot, "file", file, "err", err)
+			continue
+		}
+
+		// Check if the file exists in the initial commit
+		checkCmd := exec.CommandContext(ctx, "git", "cat-file", "-e", fmt.Sprintf("%s:%s", r.initialCommit, relFile))
+		checkCmd.Dir = r.repoRoot
+		if err := checkCmd.Run(); err == nil {
+			// File exists in initial commit
+			initialFilePath := filepath.Join(r.initialWorktree, relFile)
+			initialFilesToCheck = append(initialFilesToCheck, initialFilePath)
+		}
+	}
+
+	// Run gopls check on the files that existed in the initial commit
+	var beforeIssues []GoplsIssue
+	if len(initialFilesToCheck) > 0 {
+		beforeGoplsArgs := append([]string{"check"}, initialFilesToCheck...)
+		beforeGoplsCmd := exec.CommandContext(ctx, "gopls", beforeGoplsArgs...)
+		beforeGoplsCmd.Dir = r.initialWorktree
+		beforeGoplsOut, beforeCmdErr := beforeGoplsCmd.CombinedOutput()
+		if beforeCmdErr != nil && !looksLikeGoplsIssues(beforeGoplsOut) {
+			// If gopls fails to run properly on the initial commit, log a warning and continue
+			// with empty before issues - this will be conservative and report more issues
+			slog.WarnContext(ctx, "CodeReviewer.checkGopls: gopls check failed on initial commit", "err", err, "output", string(beforeGoplsOut))
+		} else {
+			beforeIssues = parseGoplsOutput(r.initialWorktree, beforeGoplsOut)
+		}
+	}
+
+	// Find new issues that weren't present in the initial state
+	goplsRegressions := findGoplsRegressions(beforeIssues, afterIssues)
+	if len(goplsRegressions) == 0 {
+		return "", nil // no new issues
+	}
+
+	// Format the results
+	return r.formatGoplsRegressions(goplsRegressions), nil
+}
+
+// parseGoplsOutput parses the text output from gopls check
+// Each line has the format: '/path/to/file.go:448:22-26: unused parameter: path'
+func parseGoplsOutput(root string, output []byte) []GoplsIssue {
+	var issues []GoplsIssue
+	for line := range strings.Lines(string(output)) {
+		line = strings.TrimSpace(line)
+		if line == "" {
+			continue
+		}
+
+		// Skip lines that look like error messages rather than gopls issues
+		if strings.HasPrefix(line, "Error:") ||
+			strings.HasPrefix(line, "Failed:") ||
+			strings.HasPrefix(line, "Warning:") ||
+			strings.HasPrefix(line, "gopls:") {
+			continue
+		}
+
+		// Find the first colon that separates the file path from the line number
+		firstColonIdx := strings.Index(line, ":")
+		if firstColonIdx < 0 {
+			continue // Invalid format
+		}
+
+		// Verify the part before the first colon looks like a file path
+		potentialPath := line[:firstColonIdx]
+		if !strings.HasSuffix(potentialPath, ".go") {
+			continue // Not a Go file path
+		}
+
+		// Find the position of the first message separator ': '
+		// This separates the position info from the message
+		messageStart := strings.Index(line, ": ")
+		if messageStart < 0 || messageStart <= firstColonIdx {
+			continue // Invalid format
+		}
+
+		// Extract position and message
+		position := line[:messageStart]
+		rel, err := filepath.Rel(root, position)
+		if err == nil {
+			position = rel
+		}
+		message := line[messageStart+2:] // Skip the ': ' separator
+
+		// Verify position has the expected format (at least 2 colons for line:col)
+		colonCount := strings.Count(position, ":")
+		if colonCount < 2 {
+			continue // Not enough position information
+		}
+
+		issues = append(issues, GoplsIssue{
+			Position: position,
+			Message:  message,
+		})
+	}
+
+	return issues
+}
+
+// looksLikeGoplsIssues checks if the output appears to be actual gopls issues
+// rather than error messages about gopls itself failing
+func looksLikeGoplsIssues(output []byte) bool {
+	// If output is empty, it's not valid issues
+	if len(output) == 0 {
+		return false
+	}
+
+	// Check if output has at least one line that looks like a gopls issue
+	// A gopls issue looks like: '/path/to/file.go:123:45-67: message'
+	for line := range strings.Lines(string(output)) {
+		line = strings.TrimSpace(line)
+		if line == "" {
+			continue
+		}
+
+		// A gopls issue has at least two colons (file path, line number, column)
+		// and contains a colon followed by a space (separating position from message)
+		colonCount := strings.Count(line, ":")
+		hasSeparator := strings.Contains(line, ": ")
+
+		if colonCount >= 2 && hasSeparator {
+			// Check if it starts with a likely file path (ending in .go)
+			parts := strings.SplitN(line, ":", 2)
+			if strings.HasSuffix(parts[0], ".go") {
+				return true
+			}
+		}
+	}
+	return false
+}
+
+// normalizeGoplsPosition extracts just the file path from a position string
+func normalizeGoplsPosition(position string) string {
+	// Extract just the file path by taking everything before the first colon
+	parts := strings.Split(position, ":")
+	if len(parts) < 1 {
+		return position
+	}
+	return parts[0]
+}
+
+// findGoplsRegressions identifies gopls issues that are new in the after state
+func findGoplsRegressions(before, after []GoplsIssue) []GoplsIssue {
+	var regressions []GoplsIssue
+
+	// Build map of before issues for easier lookup
+	beforeIssueMap := make(map[string]map[string]bool) // file -> message -> exists
+	for _, issue := range before {
+		file := normalizeGoplsPosition(issue.Position)
+		if _, exists := beforeIssueMap[file]; !exists {
+			beforeIssueMap[file] = make(map[string]bool)
+		}
+		// Store both the exact message and the general issue type for fuzzy matching
+		beforeIssueMap[file][issue.Message] = true
+
+		// Extract the general issue type (everything before the first ':' in the message)
+		generalIssue := issue.Message
+		if colonIdx := strings.Index(issue.Message, ":"); colonIdx > 0 {
+			generalIssue = issue.Message[:colonIdx]
+		}
+		beforeIssueMap[file][generalIssue] = true
+	}
+
+	// Check each after issue to see if it's new
+	for _, afterIssue := range after {
+		file := normalizeGoplsPosition(afterIssue.Position)
+		isNew := true
+
+		if fileIssues, fileExists := beforeIssueMap[file]; fileExists {
+			// Check for exact message match
+			if fileIssues[afterIssue.Message] {
+				isNew = false
+			} else {
+				// Check for general issue type match
+				generalIssue := afterIssue.Message
+				if colonIdx := strings.Index(afterIssue.Message, ":"); colonIdx > 0 {
+					generalIssue = afterIssue.Message[:colonIdx]
+				}
+				if fileIssues[generalIssue] {
+					isNew = false
+				}
+			}
+		}
+
+		if isNew {
+			regressions = append(regressions, afterIssue)
+		}
+	}
+
+	// Sort regressions for deterministic output
+	slices.SortFunc(regressions, func(a, b GoplsIssue) int {
+		return strings.Compare(a.Position, b.Position)
+	})
+
+	return regressions
+}
+
+// formatGoplsRegressions generates a human-readable summary of gopls check regressions
+func (r *CodeReviewer) formatGoplsRegressions(regressions []GoplsIssue) string {
+	if len(regressions) == 0 {
+		return ""
+	}
+
+	var sb strings.Builder
+	sb.WriteString("Gopls check issues detected:\n\n")
+
+	// Format each issue
+	for i, issue := range regressions {
+		sb.WriteString(fmt.Sprintf("%d. %s: %s\n", i+1, filepath.Join(r.repoRoot, issue.Position), issue.Message))
+	}
+
+	sb.WriteString("\nIMPORTANT: Only fix new gopls check issues in parts of the code that you have already edited.")
+	sb.WriteString(" Do not change existing code that was not part of your current edits.\n")
+	return sb.String()
+}
+
+func (r *CodeReviewer) HasReviewed(commit string) bool {
+	return slices.Contains(r.reviewed, commit)
+}
+
+func (r *CodeReviewer) IsInitialCommit(commit string) bool {
+	return commit == r.initialCommit
+}
+
+// packagesForFiles returns maps of packages related to the given files:
+// 1. directPkgs: packages that directly contain the changed files
+// 2. allPkgs: all packages that might be affected, including downstream packages that depend on the direct packages
+// It may include false positives.
+// Files must be absolute paths!
+func (r *CodeReviewer) packagesForFiles(ctx context.Context, files []string) (allPkgs map[string]*packages.Package, err error) {
+	for _, f := range files {
+		if !filepath.IsAbs(f) {
+			return nil, fmt.Errorf("path %q is not absolute", f)
+		}
+	}
+	cfg := &packages.Config{
+		Mode:    packages.LoadImports | packages.NeedEmbedFiles,
+		Context: ctx,
+		// Logf: func(msg string, args ...any) {
+		// 	slog.DebugContext(ctx, "loading go packages", "msg", fmt.Sprintf(msg, args...))
+		// },
+		// TODO: in theory, go.mod might not be in the repo root, and there might be multiple go.mod files.
+		// We can cross that bridge when we get there.
+		Dir:   r.repoRoot,
+		Tests: true,
+	}
+	universe, err := packages.Load(cfg, "./...")
+	if err != nil {
+		return nil, err
+	}
+	// Identify packages that directly contain the changed files
+	directPkgs := make(map[string]*packages.Package) // import path -> package
+	for _, pkg := range universe {
+		pkgFiles := allFiles(pkg)
+		for _, file := range files {
+			if pkgFiles[file] {
+				// prefer test packages, as they contain strictly more files (right?)
+				prev := directPkgs[pkg.PkgPath]
+				if prev == nil || prev.ForTest == "" {
+					directPkgs[pkg.PkgPath] = pkg
+				}
+			}
+		}
+	}
+
+	allPkgs = maps.Clone(directPkgs)
+
+	// Add packages that depend on the direct packages
+	addDependentPackages(universe, allPkgs)
+	return allPkgs, nil
+}
+
+// allFiles returns all files that might be referenced by the package.
+// It may contain false positives.
+func allFiles(p *packages.Package) map[string]bool {
+	files := make(map[string]bool)
+	// Add files from package info
+	add := [][]string{p.GoFiles, p.CompiledGoFiles, p.OtherFiles, p.EmbedFiles, p.IgnoredFiles}
+	for _, extra := range add {
+		for _, file := range extra {
+			files[file] = true
+		}
+	}
+	// Add files from testdata directory
+	testdataDir := filepath.Join(p.Dir, "testdata")
+	if _, err := os.Stat(testdataDir); err == nil {
+		fsys := os.DirFS(p.Dir)
+		fs.WalkDir(fsys, "testdata", func(path string, d fs.DirEntry, err error) error {
+			if err == nil && !d.IsDir() {
+				files[filepath.Join(p.Dir, path)] = true
+			}
+			return nil
+		})
+	}
+	return files
+}
+
+// addDependentPackages adds to pkgs all packages from universe
+// that directly or indirectly depend on any package already in pkgs.
+func addDependentPackages(universe []*packages.Package, pkgs map[string]*packages.Package) {
+	for {
+		changed := false
+		for _, p := range universe {
+			if strings.HasSuffix(p.PkgPath, ".test") { // ick, but I don't see another way
+				// skip test packages
+				continue
+			}
+			if _, ok := pkgs[p.PkgPath]; ok {
+				// already in pkgs
+				continue
+			}
+			for importPath := range p.Imports {
+				if _, ok := pkgs[importPath]; ok {
+					// imports a package dependent on pkgs, add it
+					pkgs[p.PkgPath] = p
+					changed = true
+					break
+				}
+			}
+		}
+		if !changed {
+			break
+		}
+	}
+}
+
+// testJSON is a union of BuildEvent and TestEvent
+type testJSON struct {
+	// TestEvent only:
+	// The Time field holds the time the event happened. It is conventionally omitted
+	// for cached test results.
+	Time time.Time `json:"Time"`
+	// BuildEvent only:
+	// The ImportPath field gives the package ID of the package being built.
+	// This matches the Package.ImportPath field of go list -json and the
+	// TestEvent.FailedBuild field of go test -json. Note that it does not
+	// match TestEvent.Package.
+	ImportPath string `json:"ImportPath"` // BuildEvent only
+	// TestEvent only:
+	// The Package field, if present, specifies the package being tested. When the
+	// go command runs parallel tests in -json mode, events from different tests are
+	// interlaced; the Package field allows readers to separate them.
+	Package string `json:"Package"`
+	// Action is used in both BuildEvent and TestEvent.
+	// It is the key to distinguishing between them.
+	// BuildEvent:
+	// build-output or build-fail
+	// TestEvent:
+	// start, run, pause, cont, pass, bench, fail, output, skip
+	Action string `json:"Action"`
+	// TestEvent only:
+	// The Test field, if present, specifies the test, example, or benchmark function
+	// that caused the event. Events for the overall package test do not set Test.
+	Test string `json:"Test"`
+	// TestEvent only:
+	// The Elapsed field is set for "pass" and "fail" events. It gives the time elapsed in seconds
+	// for the specific test or the overall package test that passed or failed.
+	Elapsed float64
+	// TestEvent:
+	// The Output field is set for Action == "output" and is a portion of the
+	// test's output (standard output and standard error merged together). The
+	// output is unmodified except that invalid UTF-8 output from a test is coerced
+	// into valid UTF-8 by use of replacement characters. With that one exception,
+	// the concatenation of the Output fields of all output events is the exact output
+	// of the test execution.
+	// BuildEvent:
+	// The Output field is set for Action == "build-output" and is a portion of
+	// the build's output. The concatenation of the Output fields of all output
+	// events is the exact output of the build. A single event may contain one
+	// or more lines of output and there may be more than one output event for
+	// a given ImportPath. This matches the definition of the TestEvent.Output
+	// field produced by go test -json.
+	Output string `json:"Output"`
+	// TestEvent only:
+	// The FailedBuild field is set for Action == "fail" if the test failure was caused
+	// by a build failure. It contains the package ID of the package that failed to
+	// build. This matches the ImportPath field of the "go list" output, as well as the
+	// BuildEvent.ImportPath field as emitted by "go build -json".
+	FailedBuild string `json:"FailedBuild"`
+}
+
+// parseTestResults converts test output in JSONL format into a slice of testJSON objects
+func parseTestResults(testOutput []byte) ([]testJSON, error) {
+	var results []testJSON
+	dec := json.NewDecoder(bytes.NewReader(testOutput))
+	for {
+		var event testJSON
+		if err := dec.Decode(&event); err != nil {
+			if err == io.EOF {
+				break
+			}
+			return nil, err
+		}
+		results = append(results, event)
+	}
+	return results, nil
+}
+
+// testStatus represents the status of a test in a given commit
+type testStatus int
+
+//go:generate go tool golang.org/x/tools/cmd/stringer -type=testStatus -trimprefix=testStatus
+const (
+	testStatusUnknown testStatus = iota
+	testStatusPass
+	testStatusFail
+	testStatusBuildFail
+	testStatusSkip
+	testStatusNoTests // no tests exist for this package
+)
+
+// testRegression represents a test that regressed between commits
+type testRegression struct {
+	Package      string
+	Test         string // empty for package tests
+	BeforeStatus testStatus
+	AfterStatus  testStatus
+	Output       string // failure output in the after state
+}
+
+func (r *testRegression) Source() string {
+	if r.Test == "" {
+		return r.Package
+	}
+	return fmt.Sprintf("%s.%s", r.Package, r.Test)
+}
+
+type packageResult struct {
+	Status     testStatus            // overall status for the package
+	TestStatus map[string]testStatus // name -> status
+	TestOutput map[string][]string   // name -> output parts
+}
+
+// collectTestStatuses processes a slice of test events and returns rich status information
+func collectTestStatuses(results []testJSON) map[string]*packageResult {
+	m := make(map[string]*packageResult)
+
+	for _, event := range results {
+		pkg := event.Package
+		p, ok := m[pkg]
+		if !ok {
+			p = new(packageResult)
+			p.TestStatus = make(map[string]testStatus)
+			p.TestOutput = make(map[string][]string)
+			m[pkg] = p
+		}
+
+		switch event.Action {
+		case "output":
+			p.TestOutput[event.Test] = append(p.TestOutput[event.Test], event.Output)
+			continue
+		case "pass":
+			if event.Test == "" {
+				p.Status = testStatusPass
+			} else {
+				p.TestStatus[event.Test] = testStatusPass
+			}
+		case "fail":
+			if event.Test == "" {
+				if event.FailedBuild != "" {
+					p.Status = testStatusBuildFail
+				} else {
+					p.Status = testStatusFail
+				}
+			} else {
+				p.TestStatus[event.Test] = testStatusFail
+			}
+		case "skip":
+			if event.Test == "" {
+				p.Status = testStatusNoTests
+			} else {
+				p.TestStatus[event.Test] = testStatusSkip
+			}
+		}
+	}
+
+	return m
+}
+
+// compareTestResults identifies tests that have regressed between commits
+func (r *CodeReviewer) compareTestResults(beforeResults, afterResults []testJSON) ([]testRegression, error) {
+	before := collectTestStatuses(beforeResults)
+	after := collectTestStatuses(afterResults)
+	var testLevelRegressions []testRegression
+	var packageLevelRegressions []testRegression
+
+	afterPkgs := slices.Sorted(maps.Keys(after))
+	for _, pkg := range afterPkgs {
+		afterResult := after[pkg]
+		afterStatus := afterResult.Status
+		// Can't short-circuit here when tests are passing, because we need to check for skipped tests.
+		beforeResult, ok := before[pkg]
+		beforeStatus := testStatusNoTests
+		if ok {
+			beforeStatus = beforeResult.Status
+		}
+		// If things no longer build, stop at the package level.
+		// Otherwise, proceed to the test level, so we have more precise information.
+		if afterStatus == testStatusBuildFail && beforeStatus != testStatusBuildFail {
+			packageLevelRegressions = append(packageLevelRegressions, testRegression{
+				Package:      pkg,
+				BeforeStatus: beforeStatus,
+				AfterStatus:  afterStatus,
+			})
+			continue
+		}
+		tests := slices.Sorted(maps.Keys(afterResult.TestStatus))
+		for _, test := range tests {
+			afterStatus := afterResult.TestStatus[test]
+			switch afterStatus {
+			case testStatusPass:
+				continue
+			case testStatusUnknown:
+				slog.WarnContext(context.Background(), "unknown test status", "package", pkg, "test", test)
+				continue
+			}
+			beforeStatus := beforeResult.TestStatus[test]
+			if isRegression(beforeStatus, afterStatus) {
+				testLevelRegressions = append(testLevelRegressions, testRegression{
+					Package:      pkg,
+					Test:         test,
+					BeforeStatus: beforeStatus,
+					AfterStatus:  afterStatus,
+					Output:       strings.Join(afterResult.TestOutput[test], ""),
+				})
+			}
+		}
+	}
+
+	// If we have test-level regressions, report only those
+	// Otherwise, report package-level regressions
+	var regressions []testRegression
+	if len(testLevelRegressions) > 0 {
+		regressions = testLevelRegressions
+	} else {
+		regressions = packageLevelRegressions
+	}
+
+	// Sort regressions for consistent output
+	slices.SortFunc(regressions, func(a, b testRegression) int {
+		// First by package
+		if c := strings.Compare(a.Package, b.Package); c != 0 {
+			return c
+		}
+		// Then by test name
+		return strings.Compare(a.Test, b.Test)
+	})
+
+	return regressions, nil
+}
+
+// badnessLevels maps test status to a badness level
+// Higher values indicate worse status (more severe issues)
+var badnessLevels = map[testStatus]int{
+	testStatusBuildFail: 5, // Worst
+	testStatusFail:      4,
+	testStatusSkip:      3,
+	testStatusNoTests:   2,
+	testStatusPass:      1,
+	testStatusUnknown:   0, // Least bad - avoids false positives
+}
+
+// regressionFormatter defines a mapping of before/after state pairs to descriptive messages
+type regressionKey struct {
+	before, after testStatus
+}
+
+var regressionMessages = map[regressionKey]string{
+	{testStatusUnknown, testStatusBuildFail}: "New test has build/vet errors",
+	{testStatusUnknown, testStatusFail}:      "New test is failing",
+	{testStatusUnknown, testStatusSkip}:      "New test is skipped",
+	{testStatusPass, testStatusBuildFail}:    "Was passing, now has build/vet errors",
+	{testStatusPass, testStatusFail}:         "Was passing, now failing",
+	{testStatusPass, testStatusSkip}:         "Was passing, now skipped",
+	{testStatusNoTests, testStatusBuildFail}: "Previously had no tests, now has build/vet errors",
+	{testStatusNoTests, testStatusFail}:      "Previously had no tests, now has failing tests",
+	{testStatusNoTests, testStatusSkip}:      "Previously had no tests, now has skipped tests",
+	{testStatusSkip, testStatusBuildFail}:    "Was skipped, now has build/vet errors",
+	{testStatusSkip, testStatusFail}:         "Was skipped, now failing",
+	{testStatusFail, testStatusBuildFail}:    "Was failing, now has build/vet errors",
+}
+
+// isRegression determines if a test has regressed based on before and after status
+// A regression is defined as an increase in badness level
+func isRegression(before, after testStatus) bool {
+	// Higher badness level means worse status
+	return badnessLevels[after] > badnessLevels[before]
+}
+
+// formatTestRegressions generates a human-readable summary of test regressions
+func (r *CodeReviewer) formatTestRegressions(regressions []testRegression) string {
+	if len(regressions) == 0 {
+		return ""
+	}
+
+	buf := new(strings.Builder)
+	fmt.Fprintf(buf, "Test regressions detected between initial commit (%s) and HEAD:\n\n", r.initialCommit)
+
+	for i, reg := range regressions {
+		fmt.Fprintf(buf, "%d: %v: ", i+1, reg.Source())
+		key := regressionKey{reg.BeforeStatus, reg.AfterStatus}
+		message, exists := regressionMessages[key]
+		if !exists {
+			message = "Regression detected"
+		}
+		fmt.Fprintf(buf, "%s\n", message)
+	}
+
+	return buf.String()
+}
+
+// RelatedFile represents a file historically related to the changed files
+type RelatedFile struct {
+	Path        string  // Path to the file
+	Correlation float64 // Correlation score (0.0-1.0)
+}
+
+// findRelatedFiles identifies files that are historically related to the changed files
+// by analyzing git commit history for co-occurrences.
+func (r *CodeReviewer) findRelatedFiles(ctx context.Context, changedFiles []string) ([]RelatedFile, error) {
+	commits, err := r.getCommitsTouchingFiles(ctx, changedFiles)
+	if err != nil {
+		return nil, fmt.Errorf("failed to get commits touching files: %w", err)
+	}
+	if len(commits) == 0 {
+		return nil, nil
+	}
+
+	relChanged := make(map[string]bool, len(changedFiles))
+	for _, file := range changedFiles {
+		rel, err := filepath.Rel(r.repoRoot, file)
+		if err != nil {
+			return nil, fmt.Errorf("failed to get relative path for %s: %w", file, err)
+		}
+		relChanged[rel] = true
+	}
+
+	historyFiles := make(map[string]int)
+	var historyMu sync.Mutex
+
+	maxWorkers := runtime.GOMAXPROCS(0)
+	semaphore := make(chan bool, maxWorkers)
+	var wg sync.WaitGroup
+
+	for _, commit := range commits {
+		wg.Add(1)
+		semaphore <- true // acquire
+
+		go func(commit string) {
+			defer wg.Done()
+			defer func() { <-semaphore }() // release
+			commitFiles, err := r.getFilesInCommit(ctx, commit)
+			if err != nil {
+				slog.WarnContext(ctx, "Failed to get files in commit", "commit", commit, "err", err)
+				return
+			}
+			incr := 0
+			for _, file := range commitFiles {
+				if relChanged[file] {
+					incr++
+				}
+			}
+			if incr == 0 {
+				return
+			}
+			historyMu.Lock()
+			defer historyMu.Unlock()
+			for _, file := range commitFiles {
+				historyFiles[file] += incr
+			}
+		}(commit)
+	}
+	wg.Wait()
+
+	// normalize
+	maxCount := 0
+	for _, count := range historyFiles {
+		maxCount = max(maxCount, count)
+	}
+	if maxCount == 0 {
+		return nil, nil
+	}
+
+	var relatedFiles []RelatedFile
+	for file, count := range historyFiles {
+		if relChanged[file] {
+			// Don't include inputs in the output.
+			continue
+		}
+		correlation := float64(count) / float64(maxCount)
+		// Require min correlation to avoid noise
+		if correlation >= 0.1 {
+			relatedFiles = append(relatedFiles, RelatedFile{Path: file, Correlation: correlation})
+		}
+	}
+
+	// Highest correlation first
+	slices.SortFunc(relatedFiles, func(a, b RelatedFile) int {
+		return -1 * cmp.Compare(a.Correlation, b.Correlation)
+	})
+
+	// Limit to 1 correlated file per input file.
+	// (Arbitrary limit, to be adjusted.)
+	maxFiles := len(changedFiles)
+	if len(relatedFiles) > maxFiles {
+		relatedFiles = relatedFiles[:maxFiles]
+	}
+
+	// TODO: add an LLM in the mix here (like the keyword search tool) to do a filtering pass,
+	// and then increase the strength of the wording in the relatedFiles message.
+
+	return relatedFiles, nil
+}
+
+// getCommitsTouchingFiles returns all commits that touch any of the specified files
+func (r *CodeReviewer) getCommitsTouchingFiles(ctx context.Context, files []string) ([]string, error) {
+	if len(files) == 0 {
+		return nil, nil
+	}
+	fileArgs := append([]string{"rev-list", "--all", "--date-order", "--max-count=100", "--"}, files...)
+	cmd := exec.CommandContext(ctx, "git", fileArgs...)
+	cmd.Dir = r.repoRoot
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		return nil, fmt.Errorf("failed to get commits: %w\n%s", err, out)
+	}
+	return nonEmptyTrimmedLines(out), nil
+}
+
+// getFilesInCommit returns all files changed in a specific commit
+func (r *CodeReviewer) getFilesInCommit(ctx context.Context, commit string) ([]string, error) {
+	cmd := exec.CommandContext(ctx, "git", "diff-tree", "--no-commit-id", "--name-only", "-r", commit)
+	cmd.Dir = r.repoRoot
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		return nil, fmt.Errorf("failed to get files in commit: %w\n%s", err, out)
+	}
+	return nonEmptyTrimmedLines(out), nil
+}
+
+func nonEmptyTrimmedLines(b []byte) []string {
+	var lines []string
+	for line := range strings.Lines(string(b)) {
+		line = strings.TrimSpace(line)
+		if line != "" {
+			lines = append(lines, line)
+		}
+	}
+	return lines
+}
+
+// formatRelatedFiles formats the related files list into a human-readable message
+func (r *CodeReviewer) formatRelatedFiles(files []RelatedFile) string {
+	if len(files) == 0 {
+		return ""
+	}
+
+	buf := new(strings.Builder)
+
+	fmt.Fprintf(buf, "Potentially related files:\n\n")
+
+	for _, file := range files {
+		fmt.Fprintf(buf, "- %s (%0.0f%%)\n", file.Path, 100*file.Correlation)
+	}
+
+	fmt.Fprintf(buf, "\nThese files have historically changed with the files you have modified. Consider whether they require updates as well.\n")
+	return buf.String()
+}
diff --git a/claudetool/codereview/llm_codereview_prompt.txt b/claudetool/codereview/llm_codereview_prompt.txt
new file mode 100644
index 0000000..920fc9f
--- /dev/null
+++ b/claudetool/codereview/llm_codereview_prompt.txt
@@ -0,0 +1,191 @@
+You are a code review assistant focused on identifying very specific issues.
+You analyze diffs and provide clear identification of problematic patterns.
+
+<issues>
+<issue>
+<name>Historical Comments</name>
+<description>
+Comments that describe past implementations or change history rather than explaining the current code's rationale or behavior. These are often more appropriate for commit messages than inline code comments. This also includes meta-comments that are only relevant to the current code review and wouldn't be useful to future code readers.
+</description>
+
+<identification_criteria>
+- References to past implementations (e.g., "The code used to do X", "Previously, we did Y")
+- Change history information (e.g., "This has changed", "This was modified", "Added support for")
+- Completed TODOs transformed into statements (e.g., "Implemented X", "Fixed Y")
+- Time-bound references (e.g., "As of May 2023", "In the new version")
+- Comments that won't make sense to future readers without knowledge of this specific change
+- Justifications meant only for reviewers (e.g., "Keeping as...", "Left this as...", "Moved this because...")
+- Comments explaining why something wasn't changed or was kept the same
+- Notes about implementation decisions that are only relevant during code review
+- Comments that reference the review process itself
+</identification_criteria>
+
+<exceptions>
+- Comments that explain why a particular approach was chosen over alternatives based on historical lessons
+- Comments that warn against specific approaches due to past failures
+- Comments that provide important context for why something unusual is necessary
+
+These exceptions must be written in a way that remains clear and useful to future readers without knowledge of the specific change history and should focus on the "why" behind decisions, not just documenting that a decision was made.
+</exceptions>
+
+<scope>ONLY newly added or modified comments in the diff. Do not flag existing comments or non-comment code changes.</scope>
+
+<examples>
+<example_input flag="true">
+- TODO: thread context through from caller
++ Threaded context through from caller.
+</example_input>
+<example_output>
+Historical comment: This comment only documents that a change was made, not why it matters. Consider moving to commit message.
+File: filepath/filename.ext
+Line: + Threaded context through from caller.
+</example_output>
+
+<example_input flag="true">
++ // We now use the new API for this operation
+</example_input>
+<example_output>
+Historical comment: References a change without explaining why it matters to current code readers. Consider moving to commit message.
+File: filepath/filename.ext
+Line: + // We now use the new API for this operation
+</example_output>
+
+<example_input flag="true">
++	ReadWriteTx    TransactionType = "rw" // Keeping as string literal as this is a different type
+</example_input>
+<example_output>
+Historical comment: This comment is only relevant to the current code change. Consider moving to commit message or PR description.
+File: tx/tx.go
+Line: +	ReadWriteTx    TransactionType = "rw" // Keeping as string literal as this is a different type
+</example_output>
+
+<example_input flag="false">
++ // It is tempting to short-circuit here.
++ // However, in the past, that has created thundering herds and metastable failures,
++ // so just do all the work every time.
+</example_input>
+<example_output>
+</example_output>
+</examples>
+</issue>
+
+<issue>
+<name>String Iteration Patterns (Go 1.24+)</name>
+<description>
+Newly added code that iterates over lines in a string using traditional patterns like strings.Split or bufio.Scanner when strings.Lines could be used instead. Also covers other string splitting patterns where strings.SplitSeq would be more efficient.
+</description>
+
+<identification_criteria>
+For newline iteration patterns:
+- strings.Split with newline separator followed by iteration
+- bufio.Scanner with ScanLines split (explicit or implicit)
+- Any other pattern that processes lines one by one
+
+For general string splitting patterns:
+- strings.Split with non-newline separators
+</identification_criteria>
+<scope>ONLY newly added code in the diff. Do not flag existing code.</scope>
+<examples>
+<example_input flag="true">
++ lines := strings.Split(s, "\n")
++ for _, line := range lines {
++   // process line
++ }
+</example_input>
+<example_output>
+Line iteration pattern using strings.Split with iteration can be improved.
+
+Use strings.Lines instead:
+```go
+for line := range strings.Lines(s) {
+  // process line
+}
+```
+
+Important: strings.Lines preserves terminating newlines and handles empty strings correctly. The yielded lines include their terminating newlines unless the input doesn't end with one.
+
+File: filepath/filename.ext
+Line: + lines := strings.Split(s, "\n")
+      + for _, line := range lines {
+      +   // process line
+      + }
+</example_output>
+<example_input flag="true">
++ scanner := bufio.NewScanner(strings.NewReader(text))
++ for scanner.Scan() {
++   line := scanner.Text()
++   // process line
++ }
+</example_input>
+<example_output>
+Line iteration using bufio.Scanner with ScanLines can be simplified.
+
+Use strings.Lines instead:
+```go
+for line := range strings.Lines(text) {
+  // process line
+}
+```
+
+Important: strings.Lines preserves terminating newlines and handles empty strings correctly. The yielded lines include their terminating newlines unless the input doesn't end with one.
+
+File: filepath/filename.ext
+Line: + scanner := bufio.NewScanner(strings.NewReader(text))
+      + scanner.Split(bufio.ScanLines)
+      + for scanner.Scan() {
+      +   line := scanner.Text()
+      +   // process line
+      + }
+</example_output>
+<example_input flag="true">
++ parts := strings.Split(data, ",")
++ for _, part := range parts {
++   // process part
++ }
+</example_input>
+<example_output>
+String splitting with iteration can be more efficient.
+
+Use strings.SplitSeq instead:
+```go
+for part := range strings.SplitSeq(data, ",") {
+  // process part
+}
+```
+
+Important: strings.SplitSeq creates an iterator without constructing the full slice, improving memory efficiency for large strings.
+
+File: filepath/filename.ext
+Line: + parts := strings.Split(data, ",")
+      + for _, part := range parts {
+      +   // process part
+      + }
+</example_output>
+<example_input flag="false">
+- lines := strings.Split(s, "\n")
+- for _, line := range lines {
+-   // process line
+- }
+</example_input>
+<example_output>
+</example_output>
+</examples>
+</issue>
+</issues>
+
+Output format:
+For each identified issue:
+1. State the issue type directly
+2. Provide a clear explanation of the problem
+3. Include the file path and line numbers
+4. Quote the exact text if line numbers are unavailable
+5. If completely confident you MAY include a recommended solution with example code
+6. MUST include any important caveats or notes about the replacement
+
+Do not use phrases like "For each occurrence" or other meta descriptions.
+List each instance separately if multiple exist.
+If no issues are found, output only:
+
+```
+No comments.
+```
diff --git a/claudetool/codereview/llm_review.go b/claudetool/codereview/llm_review.go
new file mode 100644
index 0000000..541ee52
--- /dev/null
+++ b/claudetool/codereview/llm_review.go
@@ -0,0 +1,50 @@
+package codereview
+
+import (
+	"context"
+	_ "embed"
+	"fmt"
+	"os/exec"
+	"strings"
+
+	"sketch.dev/llm"
+	"sketch.dev/llm/conversation"
+)
+
+//go:embed llm_codereview_prompt.txt
+var llmCodereviewPrompt string
+
+// doLLMReview analyzes the diff using an LLM subagent.
+func (r *CodeReviewer) doLLMReview(ctx context.Context) (string, error) {
+	// Get the full diff between initial commit and HEAD
+	cmd := exec.CommandContext(ctx, "git", "diff", r.initialCommit, "HEAD")
+	cmd.Dir = r.repoRoot
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		return "", fmt.Errorf("failed to get diff: %w\n%s", err, out)
+	}
+
+	// If no diff, nothing to check
+	if len(out) == 0 {
+		return "", nil
+	}
+
+	info := conversation.ToolCallInfoFromContext(ctx)
+	convo := info.Convo.SubConvo()
+	convo.SystemPrompt = strings.TrimSpace(llmCodereviewPrompt)
+	initialMessage := llm.UserStringMessage("<diff>\n" + string(out) + "\n</diff>")
+
+	resp, err := convo.SendMessage(initialMessage)
+	if err != nil {
+		return "", fmt.Errorf("failed to send LLM codereview message: %w", err)
+	}
+	if len(resp.Content) != 1 {
+		return "", fmt.Errorf("unexpected number of content blocks in LLM codereview response: %d", len(resp.Content))
+	}
+
+	response := resp.Content[0].Text
+	if strings.TrimSpace(response) == "No comments." {
+		return "", nil
+	}
+	return response, nil
+}
diff --git a/claudetool/codereview/testdata/add_skipped_test.txtar b/claudetool/codereview/testdata/add_skipped_test.txtar
new file mode 100644
index 0000000..dba6df2
--- /dev/null
+++ b/claudetool/codereview/testdata/add_skipped_test.txtar
@@ -0,0 +1,29 @@
+Adding a skipped test is a regression
+
+-- p_test.go --
+package p
+
+-- .commit --
+Initial commit with no tests
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+	t.SkipNow()
+}
+
+-- .commit --
+Add skipped test
+
+-- .run_test --
+# Errors
+
+Test regressions detected between initial commit (INITIAL_COMMIT_HASH) and HEAD:
+
+1: sketch.dev.TestP: New test is skipped
+
+
+Please fix before proceeding.
diff --git a/claudetool/codereview/testdata/basic_gofmt.txtar b/claudetool/codereview/testdata/basic_gofmt.txtar
new file mode 100644
index 0000000..c32ebe9
--- /dev/null
+++ b/claudetool/codereview/testdata/basic_gofmt.txtar
@@ -0,0 +1,13 @@
+Test autoformatting happens
+
+-- .commit --
+Initial commit
+
+-- p.go --
+    package p
+
+-- .commit --
+Add non-gofmt file
+
+-- .run_autoformat --
+/PATH/TO/REPO/p.go
diff --git a/claudetool/codereview/testdata/empty_testdir_add_file.txtar b/claudetool/codereview/testdata/empty_testdir_add_file.txtar
new file mode 100644
index 0000000..6b6a1bb
--- /dev/null
+++ b/claudetool/codereview/testdata/empty_testdir_add_file.txtar
@@ -0,0 +1,39 @@
+Test adding a non-empty file to testdata when test expects empty files
+
+-- p_test.go --
+package p
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestEmptyTestdata(t *testing.T) {
+	files, _ := filepath.Glob("testdata/*")
+	for _, path := range files {
+		data, _ := os.ReadFile(path)
+		if len(data) > 0 {
+			t.Fatalf("testdata file is not empty: %s", path)
+		}
+	}
+}
+
+-- testdata/empty --
+-- .commit --
+Initial commit with empty testdata file
+
+-- testdata/nonempty --
+hi
+-- .commit --
+Add non-empty file to testdata
+
+-- .run_test --
+# Errors
+
+Test regressions detected between initial commit (INITIAL_COMMIT_HASH) and HEAD:
+
+1: sketch.dev.TestEmptyTestdata: Was passing, now failing
+
+
+Please fix before proceeding.
diff --git a/claudetool/codereview/testdata/failing_to_failing.txtar b/claudetool/codereview/testdata/failing_to_failing.txtar
new file mode 100644
index 0000000..eea1b59
--- /dev/null
+++ b/claudetool/codereview/testdata/failing_to_failing.txtar
@@ -0,0 +1,28 @@
+Going from failing tests to failing tests is not a regression
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestX(t *testing.T) {
+	t.FailNow()
+}
+
+-- .commit --
+Initial commit (with failing test)
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestX(t *testing.T) {
+	t.FailNow()
+}
+
+-- .commit --
+Test still failing
+
+-- .run_test --
+OK
diff --git a/claudetool/codereview/testdata/fmt_hierarchy.txtar b/claudetool/codereview/testdata/fmt_hierarchy.txtar
new file mode 100644
index 0000000..613d716
--- /dev/null
+++ b/claudetool/codereview/testdata/fmt_hierarchy.txtar
@@ -0,0 +1,58 @@
+Test autoformatting hierarchy
+
+-- bad.go --
+// This file is not properly gofmt'd.
+	package main
+-- gofmt.go --
+// This file is properly gofmt'd.
+package main
+-- goimports.go --
+// This file is properly goimport'd, but not gofumpt'd.
+package main
+
+import (
+	"fmt"
+)
+
+func main() {
+
+	fmt.Println("hi")
+}
+-- gofumpt.go --
+// This file is properly gofumpt'd.
+package main
+-- .commit --
+Initial commit
+
+-- bad.go --
+// This file is still not gofmt'd, but we won't complain, because it was already not gofmt'd.
+	package main
+
+	import "fmt"
+-- gofmt.go --
+// This file is no longer properly gofmt'd.
+	package main
+-- goimports.go --
+// If we remove the imports, it no longer is goimport'd.
+// It should thus be flagged as a formatting issue, despite being gofmt'd.
+package main
+
+func main() {
+
+	fmt.Println("hi")
+}
+-- gofumpt.go --
+// This file is properly gofmt'd, but no longer gofumpt'd.
+package main
+
+func main() {
+
+	fmt.Println("hi")
+}
+-- .commit --
+Mess with formatting
+
+-- .run_autoformat --
+/PATH/TO/REPO/gofmt.go
+/PATH/TO/REPO/gofumpt.go
+/PATH/TO/REPO/goimports.go
diff --git a/claudetool/codereview/testdata/gopls_issue_unchanged.txtar b/claudetool/codereview/testdata/gopls_issue_unchanged.txtar
new file mode 100644
index 0000000..d1de43b
--- /dev/null
+++ b/claudetool/codereview/testdata/gopls_issue_unchanged.txtar
@@ -0,0 +1,25 @@
+Pre-existing gopls issues should not be reported as regressions
+
+-- p.go --
+package p
+
+func F() {
+	x := 42
+}
+
+-- .commit --
+Initial commit with existing gopls issue
+
+-- p.go --
+package p
+
+func F() {
+	x := 42
+	// unrelated change
+}
+
+-- .commit --
+Make a change but keep the same gopls issue
+
+-- .run_test --
+OK
diff --git a/claudetool/codereview/testdata/gopls_issues.txtar b/claudetool/codereview/testdata/gopls_issues.txtar
new file mode 100644
index 0000000..c0250ac
--- /dev/null
+++ b/claudetool/codereview/testdata/gopls_issues.txtar
@@ -0,0 +1,27 @@
+Detect newly introduced gopls issues
+
+-- .commit --
+Initial commit
+
+-- p.go --
+package p
+
+func F() {
+	return
+	panic("unreachable")
+}
+
+-- .commit --
+Add file with gopls issues
+
+-- .run_test --
+# Errors
+
+Gopls check issues detected:
+
+1. /PATH/TO/REPO/p.go:5:2-22: unreachable code
+
+IMPORTANT: Only fix new gopls check issues in parts of the code that you have already edited. Do not change existing code that was not part of your current edits.
+
+
+Please fix before proceeding.
diff --git a/claudetool/codereview/testdata/gopls_line_number_changed.txtar b/claudetool/codereview/testdata/gopls_line_number_changed.txtar
new file mode 100644
index 0000000..c7eeb37
--- /dev/null
+++ b/claudetool/codereview/testdata/gopls_line_number_changed.txtar
@@ -0,0 +1,26 @@
+Pre-existing gopls issues with line number changes should not be reported as regressions
+
+-- p.go --
+package p
+
+func F() {
+	x := 42
+}
+
+-- .commit --
+Initial commit with existing gopls issue
+
+-- p.go --
+package p
+
+// Change the line number of the unused variable
+
+func F() {
+	x := 42
+}
+
+-- .commit --
+Add comments that change line numbers
+
+-- .run_test --
+OK
diff --git a/claudetool/codereview/testdata/gopls_new_issue_with_existing.txtar b/claudetool/codereview/testdata/gopls_new_issue_with_existing.txtar
new file mode 100644
index 0000000..594ea93
--- /dev/null
+++ b/claudetool/codereview/testdata/gopls_new_issue_with_existing.txtar
@@ -0,0 +1,38 @@
+Only new gopls issues should be reported when existing issues are present
+
+-- p.go --
+package p
+
+func F() {
+	x := 42
+}
+
+-- .commit --
+Initial commit with existing gopls issue
+
+-- p.go --
+package p
+
+func F() {
+	x := 42
+}
+
+func G() {
+	panic("i went down down down")
+	panic("burning ring of fire")
+}
+
+-- .commit --
+Add a new function with a new gopls issue
+
+-- .run_test --
+# Errors
+
+Gopls check issues detected:
+
+1. /PATH/TO/REPO/p.go:9:2-31: unreachable code
+
+IMPORTANT: Only fix new gopls check issues in parts of the code that you have already edited. Do not change existing code that was not part of your current edits.
+
+
+Please fix before proceeding.
diff --git a/claudetool/codereview/testdata/mark_test_skipped.txtar b/claudetool/codereview/testdata/mark_test_skipped.txtar
new file mode 100644
index 0000000..17f5168
--- /dev/null
+++ b/claudetool/codereview/testdata/mark_test_skipped.txtar
@@ -0,0 +1,34 @@
+Marking a passing test as skipped test is a regression
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+}
+
+-- .commit --
+Initial commit
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+	t.SkipNow()
+}
+
+-- .commit --
+Skip test
+
+-- .run_test --
+# Errors
+
+Test regressions detected between initial commit (INITIAL_COMMIT_HASH) and HEAD:
+
+1: sketch.dev.TestP: Was passing, now skipped
+
+
+Please fix before proceeding.
diff --git a/claudetool/codereview/testdata/multi_commit_review.txtar b/claudetool/codereview/testdata/multi_commit_review.txtar
new file mode 100644
index 0000000..5c741ce
--- /dev/null
+++ b/claudetool/codereview/testdata/multi_commit_review.txtar
@@ -0,0 +1,53 @@
+Test that all issues across multiple commits are reported
+
+-- a.go --
+package main
+
+-- .commit --
+Initial commit
+
+-- b.go --
+package main
+
+import "sync"
+
+var x sync.Mutex
+var y = x
+
+-- .commit --
+Add another file
+
+-- c_test.go --
+package main
+
+import "testing"
+
+func TestX(t *testing.T) {
+	t.FailNow()
+}
+
+-- .commit --
+Add a failing test
+
+-- d.go --
+package main
+
+-- .commit --
+Add yet another file
+
+-- .run_test --
+# Errors
+
+Test regressions detected between initial commit (INITIAL_COMMIT_HASH) and HEAD:
+
+1: sketch.dev.TestX: New test is failing
+
+
+Gopls check issues detected:
+
+1. /PATH/TO/REPO/b.go:6:9-10: variable declaration copies lock value to y: sync.Mutex
+
+IMPORTANT: Only fix new gopls check issues in parts of the code that you have already edited. Do not change existing code that was not part of your current edits.
+
+
+Please fix before proceeding.
diff --git a/claudetool/codereview/testdata/multiple_format_issues.txtar b/claudetool/codereview/testdata/multiple_format_issues.txtar
new file mode 100644
index 0000000..0024a48
--- /dev/null
+++ b/claudetool/codereview/testdata/multiple_format_issues.txtar
@@ -0,0 +1,21 @@
+Test that formatting issues are not reported if pre-existing, per-file
+
+-- main.go --
+	package main
+
+-- .commit --
+Initial commit
+
+-- main.go --
+	package main
+
+	import "fmt"
+
+-- file1.go --
+	package main
+
+-- .commit --
+Add multiple files with formatting issues
+
+-- .run_autoformat --
+/PATH/TO/REPO/file1.go
diff --git a/claudetool/codereview/testdata/new_test_build_error.txtar b/claudetool/codereview/testdata/new_test_build_error.txtar
new file mode 100644
index 0000000..7a9166d
--- /dev/null
+++ b/claudetool/codereview/testdata/new_test_build_error.txtar
@@ -0,0 +1,29 @@
+Adding a new test with build errors is a regression
+
+-- p.go --
+package p
+
+-- .commit --
+Initial commit with no tests
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+	(
+}
+
+-- .commit --
+Add test with build error
+
+-- .run_test --
+# Errors
+
+Test regressions detected between initial commit (INITIAL_COMMIT_HASH) and HEAD:
+
+1: sketch.dev: Previously had no tests, now has build/vet errors
+
+
+Please fix before proceeding.
diff --git a/claudetool/codereview/testdata/no_fmt_autogenerated.txtar b/claudetool/codereview/testdata/no_fmt_autogenerated.txtar
new file mode 100644
index 0000000..aab3f57
--- /dev/null
+++ b/claudetool/codereview/testdata/no_fmt_autogenerated.txtar
@@ -0,0 +1,22 @@
+Test that autogenerated files do not get formatted
+
+-- .commit --
+Initial commit
+
+-- generated.go --
+// Code generated DO NOT EDIT.
+
+package main
+
+import (
+_ "fmt"
+)
+
+-- manual.go --
+	package main
+
+-- .commit --
+Add generated and manual files with poor formatting
+
+-- .run_autoformat --
+/PATH/TO/REPO/manual.go
diff --git a/claudetool/codereview/testdata/no_tests.txtar b/claudetool/codereview/testdata/no_tests.txtar
new file mode 100644
index 0000000..ceeb2a5
--- /dev/null
+++ b/claudetool/codereview/testdata/no_tests.txtar
@@ -0,0 +1,19 @@
+Going from no tests to no tests is not a regression
+
+-- p.go --
+package p
+
+-- .commit --
+Initial commit
+
+-- other.go --
+package p
+
+func F() {
+}
+
+-- .commit --
+Add func
+
+-- .run_test --
+OK
diff --git a/claudetool/codereview/testdata/no_tests_to_failing_tests.txtar b/claudetool/codereview/testdata/no_tests_to_failing_tests.txtar
new file mode 100644
index 0000000..b15c179
--- /dev/null
+++ b/claudetool/codereview/testdata/no_tests_to_failing_tests.txtar
@@ -0,0 +1,29 @@
+Adding failing tests is a regression
+
+-- p.go --
+package p
+
+-- .commit --
+Initial commit with no tests
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+	t.FailNow()
+}
+
+-- .commit --
+Add failing test
+
+-- .run_test --
+# Errors
+
+Test regressions detected between initial commit (INITIAL_COMMIT_HASH) and HEAD:
+
+1: sketch.dev.TestP: New test is failing
+
+
+Please fix before proceeding.
diff --git a/claudetool/codereview/testdata/passing_to_build_error.txtar b/claudetool/codereview/testdata/passing_to_build_error.txtar
new file mode 100644
index 0000000..255f26d
--- /dev/null
+++ b/claudetool/codereview/testdata/passing_to_build_error.txtar
@@ -0,0 +1,34 @@
+Going from a passing test to a test with build errors is a regression
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+}
+
+-- .commit --
+Initial commit with passing test
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+	(
+}
+
+-- .commit --
+Break test with build error
+
+-- .run_test --
+# Errors
+
+Test regressions detected between initial commit (INITIAL_COMMIT_HASH) and HEAD:
+
+1: sketch.dev: Was passing, now has build/vet errors
+
+
+Please fix before proceeding.
diff --git a/claudetool/codereview/testdata/passing_to_failing.txtar b/claudetool/codereview/testdata/passing_to_failing.txtar
new file mode 100644
index 0000000..f96208b
--- /dev/null
+++ b/claudetool/codereview/testdata/passing_to_failing.txtar
@@ -0,0 +1,34 @@
+Going from a passing test to a failing test is a regression
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+}
+
+-- .commit --
+Initial commit with passing test
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+	t.FailNow()
+}
+
+-- .commit --
+Break test
+
+-- .run_test --
+# Errors
+
+Test regressions detected between initial commit (INITIAL_COMMIT_HASH) and HEAD:
+
+1: sketch.dev.TestP: Was passing, now failing
+
+
+Please fix before proceeding.
diff --git a/claudetool/codereview/testdata/passing_to_failing_subdir.txtar b/claudetool/codereview/testdata/passing_to_failing_subdir.txtar
new file mode 100644
index 0000000..7aaef4a
--- /dev/null
+++ b/claudetool/codereview/testdata/passing_to_failing_subdir.txtar
@@ -0,0 +1,34 @@
+Going from a passing test to a failing test in a subdir is a regression
+
+-- foo/p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+}
+
+-- .commit --
+Initial commit with passing test
+
+-- foo/p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+	t.FailNow()
+}
+
+-- .commit --
+Break test
+
+-- .run_test --
+# Errors
+
+Test regressions detected between initial commit (INITIAL_COMMIT_HASH) and HEAD:
+
+1: sketch.dev/foo.TestP: Was passing, now failing
+
+
+Please fix before proceeding.
diff --git a/claudetool/codereview/testdata/related_files_cooccurrence.txtar b/claudetool/codereview/testdata/related_files_cooccurrence.txtar
new file mode 100644
index 0000000..b15f325
--- /dev/null
+++ b/claudetool/codereview/testdata/related_files_cooccurrence.txtar
@@ -0,0 +1,103 @@
+Tests related files identification based on historical co-occurrence
+
+-- a.go --
+package main
+
+func a() {}
+
+-- b.go --
+package main
+
+func b() {}
+
+-- c.go --
+package main
+
+func c() {}
+
+-- p.go --
+package p
+
+func d() {}
+
+-- .commit --
+Add functions to a.go and b.go
+
+-- a.go --
+package main
+
+func a() {
+    // Update 1
+}
+
+-- b.go --
+package main
+
+func b() {
+    // Update 1
+}
+
+-- .commit --
+Add functions to a.go and b.go again
+
+-- a.go --
+package main
+
+func a() {
+    // Update 2
+}
+
+-- b.go --
+package main
+
+func b() {
+    // Update 2
+}
+
+-- .commit --
+Add functions to a.go and c.go
+
+-- a.go --
+package main
+
+func a() {
+    // Update 3
+}
+
+-- c.go --
+package main
+
+func c() {
+    // Update 1
+}
+
+-- .commit --
+Update file a.go only
+
+-- a.go --
+package main
+
+func a() {
+    x := 42 // new gopls issue to view mixed info/error lines
+}
+
+-- .run_test --
+# Info
+
+Potentially related files:
+
+- p.go (33%)
+
+These files have historically changed with the files you have modified. Consider whether they require updates as well.
+
+
+# Errors
+
+Gopls check issues detected:
+
+1. /PATH/TO/REPO/a.go:4:5-6: declared and not used: x
+
+IMPORTANT: Only fix new gopls check issues in parts of the code that you have already edited. Do not change existing code that was not part of your current edits.
+
+
+Please fix before proceeding.
diff --git a/claudetool/codereview/testdata/skipped_to_build_error.txtar b/claudetool/codereview/testdata/skipped_to_build_error.txtar
new file mode 100644
index 0000000..4bd8642
--- /dev/null
+++ b/claudetool/codereview/testdata/skipped_to_build_error.txtar
@@ -0,0 +1,35 @@
+Going from a skipped test to a test with build errors is a regression
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+	t.SkipNow()
+}
+
+-- .commit --
+Initial commit with skipped test
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+	(
+}
+
+-- .commit --
+Change skipped test to test with build error
+
+-- .run_test --
+# Errors
+
+Test regressions detected between initial commit (INITIAL_COMMIT_HASH) and HEAD:
+
+1: sketch.dev: Was passing, now has build/vet errors
+
+
+Please fix before proceeding.
diff --git a/claudetool/codereview/testdata/skipped_to_failing.txtar b/claudetool/codereview/testdata/skipped_to_failing.txtar
new file mode 100644
index 0000000..55c12a8
--- /dev/null
+++ b/claudetool/codereview/testdata/skipped_to_failing.txtar
@@ -0,0 +1,35 @@
+Going from a skipped test to a failing test is a regression
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+	t.SkipNow()
+}
+
+-- .commit --
+Initial commit with skipped test
+
+-- p_test.go --
+package p
+
+import "testing"
+
+func TestP(t *testing.T) {
+	t.FailNow()
+}
+
+-- .commit --
+Change skipped test to failing test
+
+-- .run_test --
+# Errors
+
+Test regressions detected between initial commit (INITIAL_COMMIT_HASH) and HEAD:
+
+1: sketch.dev.TestP: Was skipped, now failing
+
+
+Please fix before proceeding.
diff --git a/claudetool/codereview/testdata/vet_error_test.txtar b/claudetool/codereview/testdata/vet_error_test.txtar
new file mode 100644
index 0000000..6f2c7c7
--- /dev/null
+++ b/claudetool/codereview/testdata/vet_error_test.txtar
@@ -0,0 +1,38 @@
+Check that vet errors are detected in a test run
+
+-- p.go --
+package p
+
+-- .commit --
+Initial commit
+
+-- p.go --
+package p
+
+import (
+	"fmt"
+)
+
+func F()  {
+	fmt.Printf("Not a string: %s\n", 10)
+}
+
+-- .commit --
+Add vet error
+
+-- .run_test --
+# Errors
+
+Test regressions detected between initial commit (INITIAL_COMMIT_HASH) and HEAD:
+
+1: sketch.dev: Previously had no tests, now has build/vet errors
+
+
+Gopls check issues detected:
+
+1. /PATH/TO/REPO/p.go:8:2-38: fmt.Printf format %s has arg 10 of wrong type int
+
+IMPORTANT: Only fix new gopls check issues in parts of the code that you have already edited. Do not change existing code that was not part of your current edits.
+
+
+Please fix before proceeding.
diff --git a/claudetool/codereview/teststatus_string.go b/claudetool/codereview/teststatus_string.go
new file mode 100644
index 0000000..6c9ac90
--- /dev/null
+++ b/claudetool/codereview/teststatus_string.go
@@ -0,0 +1,28 @@
+// Code generated by "stringer -type=testStatus -trimprefix=testStatus"; DO NOT EDIT.
+
+package codereview
+
+import "strconv"
+
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[testStatusUnknown-0]
+	_ = x[testStatusPass-1]
+	_ = x[testStatusFail-2]
+	_ = x[testStatusBuildFail-3]
+	_ = x[testStatusSkip-4]
+	_ = x[testStatusNoTests-5]
+}
+
+const _testStatus_name = "UnknownPassFailBuildFailSkipNoTests"
+
+var _testStatus_index = [...]uint8{0, 7, 11, 15, 24, 28, 35}
+
+func (i testStatus) String() string {
+	if i < 0 || i >= testStatus(len(_testStatus_index)-1) {
+		return "testStatus(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+	return _testStatus_name[_testStatus_index[i]:_testStatus_index[i+1]]
+}