claudetool/codereview: new package extracted from claudetool

Co-Authored-By: sketch <hello@sketch.dev>
diff --git a/claudetool/codereview/codereview.go b/claudetool/codereview/codereview.go
new file mode 100644
index 0000000..bf118ee
--- /dev/null
+++ b/claudetool/codereview/codereview.go
@@ -0,0 +1,356 @@
+package codereview
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"log/slog"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+
+	"sketch.dev/claudetool"
+)
+
+// A CodeReviewer manages quality checks.
+type CodeReviewer struct {
+	repoRoot        string
+	initialCommit   string
+	initialStatus   []fileStatus // git status of files at initial commit, absolute paths
+	reviewed        []string     // history of all commits which have been reviewed
+	initialWorktree string       // git worktree at initial commit, absolute path
+	llmReview       bool         // enables a subagent LLM review
+}
+
+const (
+	NoLLMReview = false
+	DoLLMReview = true
+)
+
+func NewCodeReviewer(ctx context.Context, repoRoot, initialCommit string, llmReview bool) (*CodeReviewer, error) {
+	r := &CodeReviewer{
+		repoRoot:      repoRoot,
+		initialCommit: initialCommit,
+		llmReview:     llmReview,
+	}
+	if r.repoRoot == "" {
+		return nil, fmt.Errorf("NewCodeReviewer: repoRoot must be non-empty")
+	}
+	if r.initialCommit == "" {
+		return nil, fmt.Errorf("NewCodeReviewer: initialCommit must be non-empty")
+	}
+	// Confirm that root is in fact the git repo root.
+	root, err := claudetool.FindRepoRoot(r.repoRoot)
+	if err != nil {
+		return nil, err
+	}
+	if root != r.repoRoot {
+		return nil, fmt.Errorf("NewCodeReviewer: repoRoot=%q but git repo root is %q", r.repoRoot, root)
+	}
+
+	// Get an initial list of dirty and untracked files.
+	// We'll filter them out later when deciding whether the worktree is clean.
+	status, err := r.repoStatus(ctx)
+	if err != nil {
+		return nil, err
+	}
+	r.initialStatus = status
+	return r, nil
+}
+
+// Autoformat formats all files changed in HEAD.
+// It returns a list of all files that were formatted.
+// It is best-effort only.
+func (r *CodeReviewer) Autoformat(ctx context.Context) []string {
+	// Refuse to format if HEAD == r.InitialCommit
+	head, err := r.CurrentCommit(ctx)
+	if err != nil {
+		slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get current commit", "err", err)
+		return nil
+	}
+	parent, err := r.ResolveCommit(ctx, "HEAD^1")
+	if err != nil {
+		slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get parent commit", "err", err)
+		return nil
+	}
+	if head == r.initialCommit {
+		slog.WarnContext(ctx, "CodeReviewer.Autoformat refusing to format because HEAD == InitialCommit")
+		return nil
+	}
+	// Retrieve a list of all files changed
+	// TODO: instead of one git diff --name-only and then N --name-status, do one --name-status.
+	changedFiles, err := r.changedFiles(ctx, r.initialCommit, head)
+	if err != nil {
+		slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get changed files", "err", err)
+		return nil
+	}
+
+	// General strategy: For all changed files,
+	// run the strictest formatter that passes on the original version.
+	// TODO: add non-Go formatters?
+	// TODO: at a minimum, for common file types, ensure trailing newlines and maybe trim trailing whitespace per line?
+	var fmtFiles []string
+	for _, file := range changedFiles {
+		if !strings.HasSuffix(file, ".go") {
+			continue
+		}
+		fileStatus, err := r.gitFileStatus(ctx, file)
+		if err != nil {
+			slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get file status", "file", file, "err", err)
+			continue
+		}
+		if fileStatus == "D" { // deleted, nothing to format
+			continue
+		}
+		code, err := r.getFileContentAtCommit(ctx, file, head)
+		if err != nil {
+			slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get file content at head", "file", file, "err", err)
+			continue
+		}
+		if claudetool.IsAutogeneratedGoFile(code) { // leave autogenerated files alone
+			continue
+		}
+		onDisk, err := os.ReadFile(file)
+		if err != nil {
+			slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to read file", "file", file, "err", err)
+			continue
+		}
+		if !bytes.Equal(code, onDisk) { // file has been modified since HEAD
+			slog.WarnContext(ctx, "CodeReviewer.Autoformat file modified since HEAD", "file", file, "err", err)
+			continue
+		}
+		var formatterToUse string
+		if fileStatus == "A" {
+			formatterToUse = "gofumpt" // newly added, so we can format how we please: use gofumpt
+		} else {
+			prev, err := r.getFileContentAtCommit(ctx, file, parent)
+			if err != nil {
+				slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get file content at parent", "file", file, "err", err)
+				continue
+			}
+			formatterToUse = r.pickFormatter(ctx, prev) // pick the strictest formatter that passes on the original version
+		}
+
+		// Apply the chosen formatter to the current file
+		newCode := r.runFormatter(ctx, formatterToUse, code)
+		if newCode == nil { // no changes made
+			continue
+		}
+		// write to disk
+		if err := os.WriteFile(file, newCode, 0o600); err != nil {
+			slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to write formatted file", "file", file, "err", err)
+			continue
+		}
+		fmtFiles = append(fmtFiles, file)
+	}
+	return fmtFiles
+}
+
+// RequireNormalGitState checks that the git repo state is pretty normal.
+func (r *CodeReviewer) RequireNormalGitState(_ context.Context) error {
+	rebaseDirs := []string{"rebase-merge", "rebase-apply"}
+	for _, dir := range rebaseDirs {
+		_, err := os.Stat(filepath.Join(r.repoRoot, dir))
+		if err == nil {
+			return fmt.Errorf("git repo is not clean: rebase in progress")
+		}
+	}
+	filesReason := map[string]string{
+		"MERGE_HEAD":       "merge is in progress",
+		"CHERRY_PICK_HEAD": "cherry-pick is in progress",
+		"REVERT_HEAD":      "revert is in progress",
+		"BISECT_LOG":       "bisect is in progress",
+	}
+	for file, reason := range filesReason {
+		_, err := os.Stat(filepath.Join(r.repoRoot, file))
+		if err == nil {
+			return fmt.Errorf("git repo is not clean: %s", reason)
+		}
+	}
+	return nil
+}
+
+func (r *CodeReviewer) RequireNoUncommittedChanges(ctx context.Context) error {
+	// Check that there are no uncommitted changes, whether staged or not.
+	// (Changes in r.initialStatus are OK, no other changes are.)
+	statuses, err := r.repoStatus(ctx)
+	if err != nil {
+		return fmt.Errorf("unable to get repo status: %w", err)
+	}
+	uncommitted := new(strings.Builder)
+	for _, status := range statuses {
+		if !r.initialStatusesContainFile(status.Path) {
+			fmt.Fprintf(uncommitted, "%s %s\n", status.Path, status.RawStatus)
+		}
+	}
+	if uncommitted.Len() > 0 {
+		return fmt.Errorf("uncommitted changes in repo, please commit or revert:\n%s", uncommitted.String())
+	}
+	return nil
+}
+
+func (r *CodeReviewer) initialStatusesContainFile(file string) bool {
+	for _, s := range r.initialStatus {
+		if s.Path == file {
+			return true
+		}
+	}
+	return false
+}
+
+type fileStatus struct {
+	Path      string
+	RawStatus string // always 2 characters
+}
+
+func (r *CodeReviewer) repoStatus(ctx context.Context) ([]fileStatus, error) {
+	// Run git status --porcelain, split into lines
+	cmd := exec.CommandContext(ctx, "git", "status", "--porcelain")
+	cmd.Dir = r.repoRoot
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		return nil, fmt.Errorf("failed to run git status: %w\n%s", err, out)
+	}
+	var statuses []fileStatus
+	for line := range strings.Lines(string(out)) {
+		if len(line) == 0 {
+			continue
+		}
+		if len(line) < 3 {
+			return nil, fmt.Errorf("invalid status line: %s", line)
+		}
+		path := line[3:]
+		status := line[:2]
+		absPath := r.absPath(path)
+		statuses = append(statuses, fileStatus{Path: absPath, RawStatus: status})
+	}
+	return statuses, nil
+}
+
+// CurrentCommit retrieves the current git commit hash
+func (r *CodeReviewer) CurrentCommit(ctx context.Context) (string, error) {
+	return r.ResolveCommit(ctx, "HEAD")
+}
+
+func (r *CodeReviewer) ResolveCommit(ctx context.Context, ref string) (string, error) {
+	cmd := exec.CommandContext(ctx, "git", "rev-parse", ref)
+	cmd.Dir = r.repoRoot
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		return "", fmt.Errorf("failed to get current commit hash: %w\n%s", err, out)
+	}
+	return strings.TrimSpace(string(out)), nil
+}
+
+func (r *CodeReviewer) absPath(relPath string) string {
+	return filepath.Clean(filepath.Join(r.repoRoot, relPath))
+}
+
+// gitFileStatus returns the status of a file (A for added, M for modified, D for deleted, etc.)
+func (r *CodeReviewer) gitFileStatus(ctx context.Context, file string) (string, error) {
+	cmd := exec.CommandContext(ctx, "git", "diff", "--name-status", r.initialCommit, "HEAD", "--", file)
+	cmd.Dir = r.repoRoot
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		return "", fmt.Errorf("failed to get file status: %w\n%s", err, out)
+	}
+	status := strings.TrimSpace(string(out))
+	if status == "" {
+		return "", fmt.Errorf("no status found for file: %s", file)
+	}
+	return string(status[0]), nil
+}
+
+// getFileContentAtCommit retrieves file content at a specific commit
+func (r *CodeReviewer) getFileContentAtCommit(ctx context.Context, file, commit string) ([]byte, error) {
+	relFile, err := filepath.Rel(r.repoRoot, file)
+	if err != nil {
+		slog.WarnContext(ctx, "CodeReviewer.getFileContentAtCommit: failed to get relative path", "repo_root", r.repoRoot, "file", file, "err", err)
+		file = relFile
+	}
+	cmd := exec.CommandContext(ctx, "git", "show", fmt.Sprintf("%s:%s", commit, relFile))
+	cmd.Dir = r.repoRoot
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		return nil, fmt.Errorf("failed to get file content at commit %s: %w\n%s", commit, err, out)
+	}
+	return out, nil
+}
+
+// runFormatter runs the specified formatter on a file and returns the results.
+// A nil result indicates that the file is unchanged, or that an error occurred.
+func (r *CodeReviewer) runFormatter(ctx context.Context, formatter string, content []byte) []byte {
+	if formatter == "" {
+		return nil // no formatter
+	}
+	// Run the formatter and capture the output
+	cmd := exec.CommandContext(ctx, formatter)
+	cmd.Dir = r.repoRoot
+	cmd.Stdin = bytes.NewReader(content)
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		// probably a parse error, err on the side of safety
+		return nil
+	}
+	if bytes.Equal(content, out) {
+		return nil // no changes
+	}
+	return out
+}
+
+// formatterWouldChange reports whether a formatter would make changes to the content.
+// If the contents are invalid, it returns false.
+// It works by piping the content to the formatter with the -l flag.
+func (r *CodeReviewer) formatterWouldChange(ctx context.Context, formatter string, content []byte) bool {
+	cmd := exec.CommandContext(ctx, formatter, "-l")
+	cmd.Dir = r.repoRoot
+	cmd.Stdin = bytes.NewReader(content)
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		// probably a parse error, err on the side of safety
+		return false
+	}
+
+	// If the output is empty, the file passes the formatter
+	// If the output contains "<standard input>", the file would be changed
+	return len(bytes.TrimSpace(out)) > 0
+}
+
+// pickFormatter picks a formatter to use for code.
+// If something goes wrong, it recommends no formatter (empty string).
+func (r *CodeReviewer) pickFormatter(ctx context.Context, code []byte) string {
+	// Test each formatter from strictest to least strict.
+	// Keep the first one that doesn't make changes.
+	formatters := []string{"gofumpt", "goimports", "gofmt"}
+	for _, formatter := range formatters {
+		if r.formatterWouldChange(ctx, formatter, code) {
+			continue
+		}
+		return formatter
+	}
+	return "" // no safe formatter found
+}
+
+// changedFiles retrieves a list of all files changed between two commits
+func (r *CodeReviewer) changedFiles(ctx context.Context, fromCommit, toCommit string) ([]string, error) {
+	cmd := exec.CommandContext(ctx, "git", "diff", "--name-only", fromCommit, toCommit)
+	cmd.Dir = r.repoRoot
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		return nil, fmt.Errorf("failed to get changed files: %w\n%s", err, out)
+	}
+	var files []string
+	for line := range strings.Lines(string(out)) {
+		line = strings.TrimSpace(line)
+		if len(line) == 0 {
+			continue
+		}
+		path := r.absPath(line)
+		if r.initialStatusesContainFile(path) {
+			continue
+		}
+		files = append(files, path)
+	}
+	return files, nil
+}