blob: d99edffd4ed5f5efe6b7729f0738224764c5862b [file] [log] [blame]
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +00001package codereview
Earl Lee2e463fb2025-04-17 11:22:22 -07002
3import (
4 "bytes"
5 "context"
6 "fmt"
7 "log/slog"
8 "os"
9 "os/exec"
10 "path/filepath"
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +000011 "slices"
Earl Lee2e463fb2025-04-17 11:22:22 -070012 "strings"
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +000013
14 "sketch.dev/claudetool"
Earl Lee2e463fb2025-04-17 11:22:22 -070015)
16
17// A CodeReviewer manages quality checks.
18type CodeReviewer struct {
19 repoRoot string
Philip Zeyliger49edc922025-05-14 09:45:45 -070020 sketchBaseRef string
Earl Lee2e463fb2025-04-17 11:22:22 -070021 initialStatus []fileStatus // git status of files at initial commit, absolute paths
22 reviewed []string // history of all commits which have been reviewed
23 initialWorktree string // git worktree at initial commit, absolute path
Josh Bleecher Snyder26b6f9b2025-07-01 01:41:11 +000024 // "Related files" caching
25 processedChangedFileSets map[string]bool // hash of sorted changedFiles -> processed
26 reportedRelatedFiles map[string]bool // file path -> reported
Earl Lee2e463fb2025-04-17 11:22:22 -070027}
28
Josh Bleecher Snyder9daa5182025-05-16 18:34:00 +000029func NewCodeReviewer(ctx context.Context, repoRoot, sketchBaseRef string) (*CodeReviewer, error) {
Earl Lee2e463fb2025-04-17 11:22:22 -070030 r := &CodeReviewer{
Josh Bleecher Snyder26b6f9b2025-07-01 01:41:11 +000031 repoRoot: repoRoot,
32 sketchBaseRef: sketchBaseRef,
33 processedChangedFileSets: make(map[string]bool),
34 reportedRelatedFiles: make(map[string]bool),
Earl Lee2e463fb2025-04-17 11:22:22 -070035 }
36 if r.repoRoot == "" {
37 return nil, fmt.Errorf("NewCodeReviewer: repoRoot must be non-empty")
38 }
Philip Zeyliger49edc922025-05-14 09:45:45 -070039 if r.sketchBaseRef == "" {
40 return nil, fmt.Errorf("NewCodeReviewer: sketchBaseRef must be non-empty")
Earl Lee2e463fb2025-04-17 11:22:22 -070041 }
42 // Confirm that root is in fact the git repo root.
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +000043 root, err := claudetool.FindRepoRoot(r.repoRoot)
Earl Lee2e463fb2025-04-17 11:22:22 -070044 if err != nil {
45 return nil, err
46 }
47 if root != r.repoRoot {
48 return nil, fmt.Errorf("NewCodeReviewer: repoRoot=%q but git repo root is %q", r.repoRoot, root)
49 }
50
51 // Get an initial list of dirty and untracked files.
52 // We'll filter them out later when deciding whether the worktree is clean.
53 status, err := r.repoStatus(ctx)
54 if err != nil {
55 return nil, err
56 }
57 r.initialStatus = status
58 return r, nil
59}
60
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +000061// autoformat formats all files changed in HEAD.
Earl Lee2e463fb2025-04-17 11:22:22 -070062// It returns a list of all files that were formatted.
63// It is best-effort only.
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +000064func (r *CodeReviewer) autoformat(ctx context.Context) []string {
65 // Refuse to format if initial commit is not an ancestor of HEAD
Philip Zeyliger49edc922025-05-14 09:45:45 -070066 err := r.requireHEADDescendantOfSketchBaseRef(ctx)
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +000067 if err != nil {
68 slog.WarnContext(ctx, "CodeReviewer.Autoformat refusing to format", "err", err)
69 return nil
70 }
71
Earl Lee2e463fb2025-04-17 11:22:22 -070072 head, err := r.CurrentCommit(ctx)
73 if err != nil {
74 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get current commit", "err", err)
75 return nil
76 }
77 parent, err := r.ResolveCommit(ctx, "HEAD^1")
78 if err != nil {
79 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get parent commit", "err", err)
80 return nil
81 }
Earl Lee2e463fb2025-04-17 11:22:22 -070082 // Retrieve a list of all files changed
83 // TODO: instead of one git diff --name-only and then N --name-status, do one --name-status.
Philip Zeyliger49edc922025-05-14 09:45:45 -070084 changedFiles, err := r.changedFiles(ctx, r.sketchBaseRef, head)
Earl Lee2e463fb2025-04-17 11:22:22 -070085 if err != nil {
86 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get changed files", "err", err)
87 return nil
88 }
89
90 // General strategy: For all changed files,
91 // run the strictest formatter that passes on the original version.
92 // TODO: add non-Go formatters?
93 // TODO: at a minimum, for common file types, ensure trailing newlines and maybe trim trailing whitespace per line?
94 var fmtFiles []string
95 for _, file := range changedFiles {
96 if !strings.HasSuffix(file, ".go") {
97 continue
98 }
99 fileStatus, err := r.gitFileStatus(ctx, file)
100 if err != nil {
101 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get file status", "file", file, "err", err)
102 continue
103 }
104 if fileStatus == "D" { // deleted, nothing to format
105 continue
106 }
107 code, err := r.getFileContentAtCommit(ctx, file, head)
108 if err != nil {
109 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get file content at head", "file", file, "err", err)
110 continue
111 }
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +0000112 if claudetool.IsAutogeneratedGoFile(code) { // leave autogenerated files alone
Earl Lee2e463fb2025-04-17 11:22:22 -0700113 continue
114 }
115 onDisk, err := os.ReadFile(file)
116 if err != nil {
117 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to read file", "file", file, "err", err)
118 continue
119 }
120 if !bytes.Equal(code, onDisk) { // file has been modified since HEAD
121 slog.WarnContext(ctx, "CodeReviewer.Autoformat file modified since HEAD", "file", file, "err", err)
122 continue
123 }
124 var formatterToUse string
125 if fileStatus == "A" {
126 formatterToUse = "gofumpt" // newly added, so we can format how we please: use gofumpt
127 } else {
128 prev, err := r.getFileContentAtCommit(ctx, file, parent)
129 if err != nil {
130 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get file content at parent", "file", file, "err", err)
131 continue
132 }
133 formatterToUse = r.pickFormatter(ctx, prev) // pick the strictest formatter that passes on the original version
134 }
135
136 // Apply the chosen formatter to the current file
137 newCode := r.runFormatter(ctx, formatterToUse, code)
138 if newCode == nil { // no changes made
139 continue
140 }
141 // write to disk
142 if err := os.WriteFile(file, newCode, 0o600); err != nil {
143 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to write formatted file", "file", file, "err", err)
144 continue
145 }
146 fmtFiles = append(fmtFiles, file)
147 }
148 return fmtFiles
149}
150
151// RequireNormalGitState checks that the git repo state is pretty normal.
152func (r *CodeReviewer) RequireNormalGitState(_ context.Context) error {
153 rebaseDirs := []string{"rebase-merge", "rebase-apply"}
154 for _, dir := range rebaseDirs {
155 _, err := os.Stat(filepath.Join(r.repoRoot, dir))
156 if err == nil {
157 return fmt.Errorf("git repo is not clean: rebase in progress")
158 }
159 }
160 filesReason := map[string]string{
161 "MERGE_HEAD": "merge is in progress",
162 "CHERRY_PICK_HEAD": "cherry-pick is in progress",
163 "REVERT_HEAD": "revert is in progress",
164 "BISECT_LOG": "bisect is in progress",
165 }
166 for file, reason := range filesReason {
167 _, err := os.Stat(filepath.Join(r.repoRoot, file))
168 if err == nil {
169 return fmt.Errorf("git repo is not clean: %s", reason)
170 }
171 }
172 return nil
173}
174
175func (r *CodeReviewer) RequireNoUncommittedChanges(ctx context.Context) error {
176 // Check that there are no uncommitted changes, whether staged or not.
177 // (Changes in r.initialStatus are OK, no other changes are.)
178 statuses, err := r.repoStatus(ctx)
179 if err != nil {
180 return fmt.Errorf("unable to get repo status: %w", err)
181 }
182 uncommitted := new(strings.Builder)
183 for _, status := range statuses {
Josh Bleecher Snydercf191902025-06-04 18:18:40 +0000184 if !statusesContainFile(r.initialStatus, status.Path) {
185 fmt.Fprintf(uncommitted, "%s %s\n", status.RawStatus, status.Path)
Earl Lee2e463fb2025-04-17 11:22:22 -0700186 }
187 }
188 if uncommitted.Len() > 0 {
Josh Bleecher Snyder83b2d352025-05-23 11:39:50 -0700189 return fmt.Errorf("uncommitted changes in repo, please commit relevant changes and revert/delete others:\n%s", uncommitted.String())
Earl Lee2e463fb2025-04-17 11:22:22 -0700190 }
191 return nil
192}
193
Josh Bleecher Snydercf191902025-06-04 18:18:40 +0000194func statusesContainFile(statuses []fileStatus, file string) bool {
195 for _, s := range statuses {
Earl Lee2e463fb2025-04-17 11:22:22 -0700196 if s.Path == file {
197 return true
198 }
199 }
200 return false
201}
202
Josh Bleecher Snydercf191902025-06-04 18:18:40 +0000203// parseGitStatusLine parses a single line from git status --porcelain output.
204// Returns the file path and status, or empty strings if the line should be ignored.
205func parseGitStatusLine(line string) (path, status string) {
206 if len(line) <= 3 {
207 return "", "" // empty line or invalid format
208 }
209 return strings.TrimSpace(line[3:]), line[:2]
210}
211
Earl Lee2e463fb2025-04-17 11:22:22 -0700212type fileStatus struct {
213 Path string
214 RawStatus string // always 2 characters
215}
216
217func (r *CodeReviewer) repoStatus(ctx context.Context) ([]fileStatus, error) {
218 // Run git status --porcelain, split into lines
219 cmd := exec.CommandContext(ctx, "git", "status", "--porcelain")
220 cmd.Dir = r.repoRoot
221 out, err := cmd.CombinedOutput()
222 if err != nil {
223 return nil, fmt.Errorf("failed to run git status: %w\n%s", err, out)
224 }
225 var statuses []fileStatus
226 for line := range strings.Lines(string(out)) {
Josh Bleecher Snydercf191902025-06-04 18:18:40 +0000227 path, status := parseGitStatusLine(line)
228 if path == "" {
229 continue // empty or invalid line
Earl Lee2e463fb2025-04-17 11:22:22 -0700230 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700231 absPath := r.absPath(path)
232 statuses = append(statuses, fileStatus{Path: absPath, RawStatus: status})
233 }
234 return statuses, nil
235}
236
237// CurrentCommit retrieves the current git commit hash
238func (r *CodeReviewer) CurrentCommit(ctx context.Context) (string, error) {
239 return r.ResolveCommit(ctx, "HEAD")
240}
241
242func (r *CodeReviewer) ResolveCommit(ctx context.Context, ref string) (string, error) {
243 cmd := exec.CommandContext(ctx, "git", "rev-parse", ref)
244 cmd.Dir = r.repoRoot
245 out, err := cmd.CombinedOutput()
246 if err != nil {
247 return "", fmt.Errorf("failed to get current commit hash: %w\n%s", err, out)
248 }
249 return strings.TrimSpace(string(out)), nil
250}
251
252func (r *CodeReviewer) absPath(relPath string) string {
253 return filepath.Clean(filepath.Join(r.repoRoot, relPath))
254}
255
256// gitFileStatus returns the status of a file (A for added, M for modified, D for deleted, etc.)
257func (r *CodeReviewer) gitFileStatus(ctx context.Context, file string) (string, error) {
Philip Zeyliger49edc922025-05-14 09:45:45 -0700258 cmd := exec.CommandContext(ctx, "git", "diff", "--name-status", r.sketchBaseRef, "HEAD", "--", file)
Earl Lee2e463fb2025-04-17 11:22:22 -0700259 cmd.Dir = r.repoRoot
260 out, err := cmd.CombinedOutput()
261 if err != nil {
262 return "", fmt.Errorf("failed to get file status: %w\n%s", err, out)
263 }
264 status := strings.TrimSpace(string(out))
265 if status == "" {
266 return "", fmt.Errorf("no status found for file: %s", file)
267 }
268 return string(status[0]), nil
269}
270
271// getFileContentAtCommit retrieves file content at a specific commit
272func (r *CodeReviewer) getFileContentAtCommit(ctx context.Context, file, commit string) ([]byte, error) {
273 relFile, err := filepath.Rel(r.repoRoot, file)
274 if err != nil {
275 slog.WarnContext(ctx, "CodeReviewer.getFileContentAtCommit: failed to get relative path", "repo_root", r.repoRoot, "file", file, "err", err)
276 file = relFile
277 }
278 cmd := exec.CommandContext(ctx, "git", "show", fmt.Sprintf("%s:%s", commit, relFile))
279 cmd.Dir = r.repoRoot
280 out, err := cmd.CombinedOutput()
281 if err != nil {
282 return nil, fmt.Errorf("failed to get file content at commit %s: %w\n%s", commit, err, out)
283 }
284 return out, nil
285}
286
287// runFormatter runs the specified formatter on a file and returns the results.
288// A nil result indicates that the file is unchanged, or that an error occurred.
289func (r *CodeReviewer) runFormatter(ctx context.Context, formatter string, content []byte) []byte {
290 if formatter == "" {
291 return nil // no formatter
292 }
293 // Run the formatter and capture the output
294 cmd := exec.CommandContext(ctx, formatter)
295 cmd.Dir = r.repoRoot
296 cmd.Stdin = bytes.NewReader(content)
297 out, err := cmd.CombinedOutput()
298 if err != nil {
299 // probably a parse error, err on the side of safety
300 return nil
301 }
302 if bytes.Equal(content, out) {
303 return nil // no changes
304 }
305 return out
306}
307
308// formatterWouldChange reports whether a formatter would make changes to the content.
309// If the contents are invalid, it returns false.
310// It works by piping the content to the formatter with the -l flag.
311func (r *CodeReviewer) formatterWouldChange(ctx context.Context, formatter string, content []byte) bool {
312 cmd := exec.CommandContext(ctx, formatter, "-l")
313 cmd.Dir = r.repoRoot
314 cmd.Stdin = bytes.NewReader(content)
315 out, err := cmd.CombinedOutput()
316 if err != nil {
317 // probably a parse error, err on the side of safety
318 return false
319 }
320
321 // If the output is empty, the file passes the formatter
322 // If the output contains "<standard input>", the file would be changed
323 return len(bytes.TrimSpace(out)) > 0
324}
325
326// pickFormatter picks a formatter to use for code.
327// If something goes wrong, it recommends no formatter (empty string).
328func (r *CodeReviewer) pickFormatter(ctx context.Context, code []byte) string {
329 // Test each formatter from strictest to least strict.
330 // Keep the first one that doesn't make changes.
331 formatters := []string{"gofumpt", "goimports", "gofmt"}
332 for _, formatter := range formatters {
333 if r.formatterWouldChange(ctx, formatter, code) {
334 continue
335 }
336 return formatter
337 }
338 return "" // no safe formatter found
339}
340
341// changedFiles retrieves a list of all files changed between two commits
342func (r *CodeReviewer) changedFiles(ctx context.Context, fromCommit, toCommit string) ([]string, error) {
343 cmd := exec.CommandContext(ctx, "git", "diff", "--name-only", fromCommit, toCommit)
344 cmd.Dir = r.repoRoot
345 out, err := cmd.CombinedOutput()
346 if err != nil {
347 return nil, fmt.Errorf("failed to get changed files: %w\n%s", err, out)
348 }
349 var files []string
350 for line := range strings.Lines(string(out)) {
351 line = strings.TrimSpace(line)
352 if len(line) == 0 {
353 continue
354 }
355 path := r.absPath(line)
Josh Bleecher Snydercf191902025-06-04 18:18:40 +0000356 if statusesContainFile(r.initialStatus, path) {
Earl Lee2e463fb2025-04-17 11:22:22 -0700357 continue
358 }
359 files = append(files, path)
360 }
361 return files, nil
362}
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000363
Josh Bleecher Snydercf191902025-06-04 18:18:40 +0000364// runGenerate runs go generate on all packages and returns a list of files changed.
365// Errors returned will be reported to the LLM.
366func (r *CodeReviewer) runGenerate(ctx context.Context, packages []string) ([]string, error) {
367 if len(packages) == 0 {
368 return nil, nil
369 }
370
371 args := []string{"generate"}
372 for _, pkg := range packages {
373 // Sigh. Working around test packages is a PITA.
374 if strings.HasSuffix(pkg, ".test") || strings.HasSuffix(pkg, "_test") {
375 continue
376 }
377 args = append(args, pkg)
378 }
379 gen := exec.CommandContext(ctx, "go", args...)
380 gen.Dir = r.repoRoot
381 out, err := gen.CombinedOutput()
382 if err != nil {
383 return nil, fmt.Errorf("$ go %s\n%s", strings.Join(args, " "), out)
384 }
385
386 status := exec.CommandContext(ctx, "git", "status", "--porcelain")
387 status.Dir = r.repoRoot
388 statusOut, err := status.CombinedOutput()
389 if err != nil {
390 return nil, fmt.Errorf("unable to get git status: %w", err)
391 }
392
393 var changed []string
394 for line := range strings.Lines(string(statusOut)) {
395 path, _ := parseGitStatusLine(line)
396 if path == "" {
397 continue
398 }
Josh Bleecher Snyder95632c82025-06-05 01:23:05 +0000399 absPath := filepath.Join(r.repoRoot, path)
400 if statusesContainFile(r.initialStatus, absPath) {
401 continue
402 }
403 changed = append(changed, absPath)
Josh Bleecher Snydercf191902025-06-04 18:18:40 +0000404 }
405
406 return changed, nil
407}
408
409// isGoRepository checks if the repository contains a go.mod file
410// TODO: check in subdirs?
411func (r *CodeReviewer) isGoRepository() bool {
412 _, err := os.Stat(filepath.Join(r.repoRoot, "go.mod"))
413 return err == nil
414}
415
Josh Bleecher Snyder1ed1cc42025-05-07 20:21:40 +0000416// ModTidy runs go mod tidy if go module files have changed.
417// Returns a list of files changed by go mod tidy (empty if none).
418func (r *CodeReviewer) ModTidy(ctx context.Context) ([]string, error) {
Philip Zeyliger49edc922025-05-14 09:45:45 -0700419 err := r.requireHEADDescendantOfSketchBaseRef(ctx)
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000420 if err != nil {
Josh Bleecher Snyder1ed1cc42025-05-07 20:21:40 +0000421 return nil, fmt.Errorf("cannot run ModTidy: %w", err)
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000422 }
423
Josh Bleecher Snyder1ed1cc42025-05-07 20:21:40 +0000424 // Check if any go.mod, go.sum, etc. files have changed
425 currentCommit, err := r.CurrentCommit(ctx)
426 if err != nil {
427 return nil, fmt.Errorf("failed to get current commit: %w", err)
428 }
429
Philip Zeyliger49edc922025-05-14 09:45:45 -0700430 changedFiles, err := r.changedFiles(ctx, r.sketchBaseRef, currentCommit)
Josh Bleecher Snyder1ed1cc42025-05-07 20:21:40 +0000431 if err != nil {
432 return nil, fmt.Errorf("failed to get changed files: %w", err)
433 }
434
435 // Check if any of the changed files are go module files
436 goModsChanged := false
437 for _, file := range changedFiles {
438 if isGoModFile(file) {
439 goModsChanged = true
440 break
441 }
442 }
443
444 if !goModsChanged {
445 // No go module files changed, so don't run go mod tidy
446 return nil, nil
447 }
448
449 // Run go mod tidy
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000450 cmd := exec.CommandContext(ctx, "go", "mod", "tidy")
451 cmd.Dir = r.repoRoot
452 out, err := cmd.CombinedOutput()
453 if err != nil {
Josh Bleecher Snyder1ed1cc42025-05-07 20:21:40 +0000454 return nil, fmt.Errorf("go mod tidy failed: %w\n%s", err, out)
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000455 }
456
Josh Bleecher Snyder1ed1cc42025-05-07 20:21:40 +0000457 // Check which files were changed by go mod tidy
458 statusCmd := exec.CommandContext(ctx, "git", "status", "--porcelain")
459 statusCmd.Dir = r.repoRoot
460 statusOut, err := statusCmd.CombinedOutput()
461 if err != nil {
462 return nil, fmt.Errorf("unable to get git status: %w", err)
463 }
464
465 var changedByTidy []string
466
467 for line := range strings.Lines(string(statusOut)) {
Josh Bleecher Snydercf191902025-06-04 18:18:40 +0000468 file, _ := parseGitStatusLine(line)
469 if file == "" {
470 continue // empty or invalid line
Josh Bleecher Snyder1ed1cc42025-05-07 20:21:40 +0000471 }
Josh Bleecher Snyder1ed1cc42025-05-07 20:21:40 +0000472 if !isGoModFile(file) {
473 continue
474 }
475 path := filepath.Join(r.repoRoot, file)
476 changedByTidy = append(changedByTidy, path)
477 }
478
479 return changedByTidy, nil
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000480}
481
482// RunMechanicalChecks runs all mechanical checks and returns a message describing any changes made.
483func (r *CodeReviewer) RunMechanicalChecks(ctx context.Context) string {
484 var actions []string
485
486 changed := r.autoformat(ctx)
487 if len(changed) > 0 {
488 actions = append(actions, "autoformatters")
489 }
490
Josh Bleecher Snyder1ed1cc42025-05-07 20:21:40 +0000491 // Run go mod tidy (only if go module files have changed)
492 tidyChanges, err := r.ModTidy(ctx)
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000493 if err != nil {
494 slog.WarnContext(ctx, "CodeReviewer.RunMechanicalChecks: ModTidy failed", "err", err)
Josh Bleecher Snyder1ed1cc42025-05-07 20:21:40 +0000495 }
496 if len(tidyChanges) > 0 {
497 changed = append(changed, tidyChanges...)
498 actions = append(actions, "`go mod tidy`")
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000499 }
500
501 if len(changed) == 0 {
502 return ""
503 }
504
505 slices.Sort(changed)
506
507 msg := fmt.Sprintf(`I ran %s, which updated these files:
508
509%s
510
511Please amend your latest git commit with these changes and then continue with what you were doing.`,
512 strings.Join(actions, " and "),
513 strings.Join(changed, "\n"),
514 )
515
516 return msg
517}
Josh Bleecher Snyder1ed1cc42025-05-07 20:21:40 +0000518
519// isGoModFile returns true if the file is a Go module file (go.mod, go.sum, etc.)
520func isGoModFile(path string) bool {
521 basename := filepath.Base(path)
522 return strings.HasPrefix(basename, "go.")
523}