| package codereview |
| |
| import ( |
| "bytes" |
| "context" |
| "fmt" |
| "log/slog" |
| "os" |
| "os/exec" |
| "path/filepath" |
| "slices" |
| "strings" |
| "sync" |
| |
| "sketch.dev/claudetool" |
| ) |
| |
| // A CodeReviewer manages quality checks. |
| type CodeReviewer struct { |
| repoRoot string |
| sketchBaseRef string |
| initialStatus []fileStatus // git status of files at initial commit, absolute paths |
| reviewed []string // history of all commits which have been reviewed |
| initialWorktree string // git worktree at initial commit, absolute path |
| // "Related files" caching |
| processedChangedFileSets map[string]bool // hash of sorted changedFiles -> processed |
| reportedRelatedFiles map[string]bool // file path -> reported |
| // Pre-warming of Go build/test cache |
| warmMutex sync.Mutex // protects warmedPackages map |
| warmedPackages map[string]bool // packages that have been cache warmed |
| } |
| |
| func NewCodeReviewer(ctx context.Context, repoRoot, sketchBaseRef string) (*CodeReviewer, error) { |
| r := &CodeReviewer{ |
| repoRoot: repoRoot, |
| sketchBaseRef: sketchBaseRef, |
| processedChangedFileSets: make(map[string]bool), |
| reportedRelatedFiles: make(map[string]bool), |
| warmedPackages: make(map[string]bool), |
| } |
| if r.repoRoot == "" { |
| return nil, fmt.Errorf("NewCodeReviewer: repoRoot must be non-empty") |
| } |
| if r.sketchBaseRef == "" { |
| return nil, fmt.Errorf("NewCodeReviewer: sketchBaseRef must be non-empty") |
| } |
| // Confirm that root is in fact the git repo root. |
| root, err := claudetool.FindRepoRoot(r.repoRoot) |
| if err != nil { |
| return nil, err |
| } |
| if root != r.repoRoot { |
| return nil, fmt.Errorf("NewCodeReviewer: repoRoot=%q but git repo root is %q", r.repoRoot, root) |
| } |
| |
| // Get an initial list of dirty and untracked files. |
| // We'll filter them out later when deciding whether the worktree is clean. |
| status, err := r.repoStatus(ctx) |
| if err != nil { |
| return nil, err |
| } |
| r.initialStatus = status |
| return r, nil |
| } |
| |
| // autoformat formats all files changed in HEAD. |
| // It returns a list of all files that were formatted. |
| // It is best-effort only. |
| func (r *CodeReviewer) autoformat(ctx context.Context) []string { |
| // Refuse to format if initial commit is not an ancestor of HEAD |
| err := r.requireHEADDescendantOfSketchBaseRef(ctx) |
| if err != nil { |
| slog.WarnContext(ctx, "CodeReviewer.Autoformat refusing to format", "err", err) |
| return nil |
| } |
| |
| head, err := r.CurrentCommit(ctx) |
| if err != nil { |
| slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get current commit", "err", err) |
| return nil |
| } |
| parent, err := r.ResolveCommit(ctx, "HEAD^1") |
| if err != nil { |
| slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get parent commit", "err", err) |
| return nil |
| } |
| // Retrieve a list of all files changed |
| // TODO: instead of one git diff --name-only and then N --name-status, do one --name-status. |
| changedFiles, err := r.changedFiles(ctx, r.sketchBaseRef, head) |
| if err != nil { |
| slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get changed files", "err", err) |
| return nil |
| } |
| |
| // General strategy: For all changed files, |
| // run the strictest formatter that passes on the original version. |
| // TODO: add non-Go formatters? |
| // TODO: at a minimum, for common file types, ensure trailing newlines and maybe trim trailing whitespace per line? |
| var fmtFiles []string |
| for _, file := range changedFiles { |
| if !strings.HasSuffix(file, ".go") { |
| continue |
| } |
| fileStatus, err := r.gitFileStatus(ctx, file) |
| if err != nil { |
| slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get file status", "file", file, "err", err) |
| continue |
| } |
| if fileStatus == "D" { // deleted, nothing to format |
| continue |
| } |
| code, err := r.getFileContentAtCommit(ctx, file, head) |
| if err != nil { |
| slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get file content at head", "file", file, "err", err) |
| continue |
| } |
| if claudetool.IsAutogeneratedGoFile(code) { // leave autogenerated files alone |
| continue |
| } |
| onDisk, err := os.ReadFile(file) |
| if err != nil { |
| slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to read file", "file", file, "err", err) |
| continue |
| } |
| if !bytes.Equal(code, onDisk) { // file has been modified since HEAD |
| slog.WarnContext(ctx, "CodeReviewer.Autoformat file modified since HEAD", "file", file, "err", err) |
| continue |
| } |
| var formatterToUse string |
| if fileStatus == "A" { |
| formatterToUse = "gofumpt" // newly added, so we can format how we please: use gofumpt |
| } else { |
| prev, err := r.getFileContentAtCommit(ctx, file, parent) |
| if err != nil { |
| slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get file content at parent", "file", file, "err", err) |
| continue |
| } |
| formatterToUse = r.pickFormatter(ctx, prev) // pick the strictest formatter that passes on the original version |
| } |
| |
| // Apply the chosen formatter to the current file |
| newCode := r.runFormatter(ctx, formatterToUse, code) |
| if newCode == nil { // no changes made |
| continue |
| } |
| // write to disk |
| if err := os.WriteFile(file, newCode, 0o600); err != nil { |
| slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to write formatted file", "file", file, "err", err) |
| continue |
| } |
| fmtFiles = append(fmtFiles, file) |
| } |
| return fmtFiles |
| } |
| |
| // RequireNormalGitState checks that the git repo state is pretty normal. |
| func (r *CodeReviewer) RequireNormalGitState(_ context.Context) error { |
| rebaseDirs := []string{"rebase-merge", "rebase-apply"} |
| for _, dir := range rebaseDirs { |
| _, err := os.Stat(filepath.Join(r.repoRoot, dir)) |
| if err == nil { |
| return fmt.Errorf("git repo is not clean: rebase in progress") |
| } |
| } |
| filesReason := map[string]string{ |
| "MERGE_HEAD": "merge is in progress", |
| "CHERRY_PICK_HEAD": "cherry-pick is in progress", |
| "REVERT_HEAD": "revert is in progress", |
| "BISECT_LOG": "bisect is in progress", |
| } |
| for file, reason := range filesReason { |
| _, err := os.Stat(filepath.Join(r.repoRoot, file)) |
| if err == nil { |
| return fmt.Errorf("git repo is not clean: %s", reason) |
| } |
| } |
| return nil |
| } |
| |
| func (r *CodeReviewer) RequireNoUncommittedChanges(ctx context.Context) error { |
| // Check that there are no uncommitted changes, whether staged or not. |
| // (Changes in r.initialStatus are OK, no other changes are.) |
| statuses, err := r.repoStatus(ctx) |
| if err != nil { |
| return fmt.Errorf("unable to get repo status: %w", err) |
| } |
| uncommitted := new(strings.Builder) |
| for _, status := range statuses { |
| if !statusesContainFile(r.initialStatus, status.Path) { |
| fmt.Fprintf(uncommitted, "%s %s\n", status.RawStatus, status.Path) |
| } |
| } |
| if uncommitted.Len() > 0 { |
| return fmt.Errorf("uncommitted changes in repo, please commit relevant changes and revert/delete others:\n%s", uncommitted.String()) |
| } |
| return nil |
| } |
| |
| func statusesContainFile(statuses []fileStatus, file string) bool { |
| for _, s := range statuses { |
| if s.Path == file { |
| return true |
| } |
| } |
| return false |
| } |
| |
| // parseGitStatusLine parses a single line from git status --porcelain output. |
| // Returns the file path and status, or empty strings if the line should be ignored. |
| func parseGitStatusLine(line string) (path, status string) { |
| if len(line) <= 3 { |
| return "", "" // empty line or invalid format |
| } |
| return strings.TrimSpace(line[3:]), line[:2] |
| } |
| |
| type fileStatus struct { |
| Path string |
| RawStatus string // always 2 characters |
| } |
| |
| func (r *CodeReviewer) repoStatus(ctx context.Context) ([]fileStatus, error) { |
| // Run git status --porcelain, split into lines |
| cmd := exec.CommandContext(ctx, "git", "status", "--porcelain") |
| cmd.Dir = r.repoRoot |
| out, err := cmd.CombinedOutput() |
| if err != nil { |
| return nil, fmt.Errorf("failed to run git status: %w\n%s", err, out) |
| } |
| var statuses []fileStatus |
| for line := range strings.Lines(string(out)) { |
| path, status := parseGitStatusLine(line) |
| if path == "" { |
| continue // empty or invalid line |
| } |
| absPath := r.absPath(path) |
| statuses = append(statuses, fileStatus{Path: absPath, RawStatus: status}) |
| } |
| return statuses, nil |
| } |
| |
| // CurrentCommit retrieves the current git commit hash |
| func (r *CodeReviewer) CurrentCommit(ctx context.Context) (string, error) { |
| return r.ResolveCommit(ctx, "HEAD") |
| } |
| |
| func (r *CodeReviewer) ResolveCommit(ctx context.Context, ref string) (string, error) { |
| cmd := exec.CommandContext(ctx, "git", "rev-parse", ref) |
| cmd.Dir = r.repoRoot |
| out, err := cmd.CombinedOutput() |
| if err != nil { |
| return "", fmt.Errorf("failed to get current commit hash: %w\n%s", err, out) |
| } |
| return strings.TrimSpace(string(out)), nil |
| } |
| |
| func (r *CodeReviewer) absPath(relPath string) string { |
| if filepath.IsAbs(relPath) { |
| return relPath |
| } |
| return filepath.Clean(filepath.Join(r.repoRoot, relPath)) |
| } |
| |
| // gitFileStatus returns the status of a file (A for added, M for modified, D for deleted, etc.) |
| func (r *CodeReviewer) gitFileStatus(ctx context.Context, file string) (string, error) { |
| cmd := exec.CommandContext(ctx, "git", "diff", "--name-status", r.sketchBaseRef, "HEAD", "--", file) |
| cmd.Dir = r.repoRoot |
| out, err := cmd.CombinedOutput() |
| if err != nil { |
| return "", fmt.Errorf("failed to get file status: %w\n%s", err, out) |
| } |
| status := strings.TrimSpace(string(out)) |
| if status == "" { |
| return "", fmt.Errorf("no status found for file: %s", file) |
| } |
| return string(status[0]), nil |
| } |
| |
| // getFileContentAtCommit retrieves file content at a specific commit |
| func (r *CodeReviewer) getFileContentAtCommit(ctx context.Context, file, commit string) ([]byte, error) { |
| relFile, err := filepath.Rel(r.repoRoot, file) |
| if err != nil { |
| slog.WarnContext(ctx, "CodeReviewer.getFileContentAtCommit: failed to get relative path", "repo_root", r.repoRoot, "file", file, "err", err) |
| file = relFile |
| } |
| cmd := exec.CommandContext(ctx, "git", "show", fmt.Sprintf("%s:%s", commit, relFile)) |
| cmd.Dir = r.repoRoot |
| out, err := cmd.CombinedOutput() |
| if err != nil { |
| return nil, fmt.Errorf("failed to get file content at commit %s: %w\n%s", commit, err, out) |
| } |
| return out, nil |
| } |
| |
| // runFormatter runs the specified formatter on a file and returns the results. |
| // A nil result indicates that the file is unchanged, or that an error occurred. |
| func (r *CodeReviewer) runFormatter(ctx context.Context, formatter string, content []byte) []byte { |
| if formatter == "" { |
| return nil // no formatter |
| } |
| // Run the formatter and capture the output |
| cmd := exec.CommandContext(ctx, formatter) |
| cmd.Dir = r.repoRoot |
| cmd.Stdin = bytes.NewReader(content) |
| out, err := cmd.CombinedOutput() |
| if err != nil { |
| // probably a parse error, err on the side of safety |
| return nil |
| } |
| if bytes.Equal(content, out) { |
| return nil // no changes |
| } |
| return out |
| } |
| |
| // formatterWouldChange reports whether a formatter would make changes to the content. |
| // If the contents are invalid, it returns false. |
| // It works by piping the content to the formatter with the -l flag. |
| func (r *CodeReviewer) formatterWouldChange(ctx context.Context, formatter string, content []byte) bool { |
| cmd := exec.CommandContext(ctx, formatter, "-l") |
| cmd.Dir = r.repoRoot |
| cmd.Stdin = bytes.NewReader(content) |
| out, err := cmd.CombinedOutput() |
| if err != nil { |
| // probably a parse error, err on the side of safety |
| return false |
| } |
| |
| // If the output is empty, the file passes the formatter |
| // If the output contains "<standard input>", the file would be changed |
| return len(bytes.TrimSpace(out)) > 0 |
| } |
| |
| // pickFormatter picks a formatter to use for code. |
| // If something goes wrong, it recommends no formatter (empty string). |
| func (r *CodeReviewer) pickFormatter(ctx context.Context, code []byte) string { |
| // Test each formatter from strictest to least strict. |
| // Keep the first one that doesn't make changes. |
| formatters := []string{"gofumpt", "goimports", "gofmt"} |
| for _, formatter := range formatters { |
| if r.formatterWouldChange(ctx, formatter, code) { |
| continue |
| } |
| return formatter |
| } |
| return "" // no safe formatter found |
| } |
| |
| // changedFiles retrieves a list of all files changed between two commits |
| func (r *CodeReviewer) changedFiles(ctx context.Context, fromCommit, toCommit string) ([]string, error) { |
| cmd := exec.CommandContext(ctx, "git", "diff", "--name-only", fromCommit, toCommit) |
| cmd.Dir = r.repoRoot |
| out, err := cmd.CombinedOutput() |
| if err != nil { |
| return nil, fmt.Errorf("failed to get changed files: %w\n%s", err, out) |
| } |
| var files []string |
| for line := range strings.Lines(string(out)) { |
| line = strings.TrimSpace(line) |
| if len(line) == 0 { |
| continue |
| } |
| path := r.absPath(line) |
| if statusesContainFile(r.initialStatus, path) { |
| continue |
| } |
| files = append(files, path) |
| } |
| return files, nil |
| } |
| |
| // runGenerate runs go generate on all packages and returns a list of files changed. |
| // Errors returned will be reported to the LLM. |
| func (r *CodeReviewer) runGenerate(ctx context.Context, packages []string) ([]string, error) { |
| if len(packages) == 0 { |
| return nil, nil |
| } |
| |
| args := []string{"generate"} |
| for _, pkg := range packages { |
| // Sigh. Working around test packages is a PITA. |
| if strings.HasSuffix(pkg, ".test") || strings.HasSuffix(pkg, "_test") { |
| continue |
| } |
| args = append(args, pkg) |
| } |
| gen := exec.CommandContext(ctx, "go", args...) |
| gen.Dir = r.repoRoot |
| out, err := gen.CombinedOutput() |
| if err != nil { |
| return nil, fmt.Errorf("$ go %s\n%s", strings.Join(args, " "), out) |
| } |
| |
| status := exec.CommandContext(ctx, "git", "status", "--porcelain") |
| status.Dir = r.repoRoot |
| statusOut, err := status.CombinedOutput() |
| if err != nil { |
| return nil, fmt.Errorf("unable to get git status: %w", err) |
| } |
| |
| var changed []string |
| for line := range strings.Lines(string(statusOut)) { |
| path, _ := parseGitStatusLine(line) |
| if path == "" { |
| continue |
| } |
| absPath := filepath.Join(r.repoRoot, path) |
| if statusesContainFile(r.initialStatus, absPath) { |
| continue |
| } |
| changed = append(changed, absPath) |
| } |
| |
| return changed, nil |
| } |
| |
| // isGoRepository checks if the repository contains a go.mod file |
| // TODO: check in subdirs? |
| func (r *CodeReviewer) isGoRepository() bool { |
| _, err := os.Stat(filepath.Join(r.repoRoot, "go.mod")) |
| return err == nil |
| } |
| |
| // ModTidy runs go mod tidy if go module files have changed. |
| // Returns a list of files changed by go mod tidy (empty if none). |
| func (r *CodeReviewer) ModTidy(ctx context.Context) ([]string, error) { |
| err := r.requireHEADDescendantOfSketchBaseRef(ctx) |
| if err != nil { |
| return nil, fmt.Errorf("cannot run ModTidy: %w", err) |
| } |
| |
| // Check if any go.mod, go.sum, etc. files have changed |
| currentCommit, err := r.CurrentCommit(ctx) |
| if err != nil { |
| return nil, fmt.Errorf("failed to get current commit: %w", err) |
| } |
| |
| changedFiles, err := r.changedFiles(ctx, r.sketchBaseRef, currentCommit) |
| if err != nil { |
| return nil, fmt.Errorf("failed to get changed files: %w", err) |
| } |
| |
| // Check if any of the changed files are go module files |
| goModsChanged := false |
| for _, file := range changedFiles { |
| if isGoModFile(file) { |
| goModsChanged = true |
| break |
| } |
| } |
| |
| if !goModsChanged { |
| // No go module files changed, so don't run go mod tidy |
| return nil, nil |
| } |
| |
| // Run go mod tidy |
| cmd := exec.CommandContext(ctx, "go", "mod", "tidy") |
| cmd.Dir = r.repoRoot |
| out, err := cmd.CombinedOutput() |
| if err != nil { |
| return nil, fmt.Errorf("go mod tidy failed: %w\n%s", err, out) |
| } |
| |
| // Check which files were changed by go mod tidy |
| statusCmd := exec.CommandContext(ctx, "git", "status", "--porcelain") |
| statusCmd.Dir = r.repoRoot |
| statusOut, err := statusCmd.CombinedOutput() |
| if err != nil { |
| return nil, fmt.Errorf("unable to get git status: %w", err) |
| } |
| |
| var changedByTidy []string |
| |
| for line := range strings.Lines(string(statusOut)) { |
| file, _ := parseGitStatusLine(line) |
| if file == "" { |
| continue // empty or invalid line |
| } |
| if !isGoModFile(file) { |
| continue |
| } |
| path := filepath.Join(r.repoRoot, file) |
| changedByTidy = append(changedByTidy, path) |
| } |
| |
| return changedByTidy, nil |
| } |
| |
| // RunMechanicalChecks runs all mechanical checks and returns a message describing any changes made. |
| func (r *CodeReviewer) RunMechanicalChecks(ctx context.Context) string { |
| var actions []string |
| |
| changed := r.autoformat(ctx) |
| if len(changed) > 0 { |
| actions = append(actions, "autoformatters") |
| } |
| |
| // Run go mod tidy (only if go module files have changed) |
| tidyChanges, err := r.ModTidy(ctx) |
| if err != nil { |
| slog.WarnContext(ctx, "CodeReviewer.RunMechanicalChecks: ModTidy failed", "err", err) |
| } |
| if len(tidyChanges) > 0 { |
| changed = append(changed, tidyChanges...) |
| actions = append(actions, "`go mod tidy`") |
| } |
| |
| if len(changed) == 0 { |
| return "" |
| } |
| |
| slices.Sort(changed) |
| |
| msg := fmt.Sprintf(`I ran %s, which updated these files: |
| |
| %s |
| |
| Please amend your latest git commit with these changes and then continue with what you were doing.`, |
| strings.Join(actions, " and "), |
| strings.Join(changed, "\n"), |
| ) |
| |
| return msg |
| } |
| |
| // isGoModFile returns true if the file is a Go module file (go.mod, go.sum, etc.) |
| func isGoModFile(path string) bool { |
| basename := filepath.Base(path) |
| return strings.HasPrefix(basename, "go.") |
| } |