blob: c729c3018d63e280af2ff9ad6d00161dc47b94df [file] [log] [blame]
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +00001package codereview
Earl Lee2e463fb2025-04-17 11:22:22 -07002
3import (
4 "bytes"
5 "context"
6 "fmt"
7 "log/slog"
8 "os"
9 "os/exec"
10 "path/filepath"
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +000011 "slices"
Earl Lee2e463fb2025-04-17 11:22:22 -070012 "strings"
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +000013
14 "sketch.dev/claudetool"
Earl Lee2e463fb2025-04-17 11:22:22 -070015)
16
17// A CodeReviewer manages quality checks.
18type CodeReviewer struct {
19 repoRoot string
Philip Zeyliger49edc922025-05-14 09:45:45 -070020 sketchBaseRef string
Earl Lee2e463fb2025-04-17 11:22:22 -070021 initialStatus []fileStatus // git status of files at initial commit, absolute paths
22 reviewed []string // history of all commits which have been reviewed
23 initialWorktree string // git worktree at initial commit, absolute path
24}
25
Josh Bleecher Snyder9daa5182025-05-16 18:34:00 +000026func NewCodeReviewer(ctx context.Context, repoRoot, sketchBaseRef string) (*CodeReviewer, error) {
Earl Lee2e463fb2025-04-17 11:22:22 -070027 r := &CodeReviewer{
28 repoRoot: repoRoot,
Philip Zeyliger49edc922025-05-14 09:45:45 -070029 sketchBaseRef: sketchBaseRef,
Earl Lee2e463fb2025-04-17 11:22:22 -070030 }
31 if r.repoRoot == "" {
32 return nil, fmt.Errorf("NewCodeReviewer: repoRoot must be non-empty")
33 }
Philip Zeyliger49edc922025-05-14 09:45:45 -070034 if r.sketchBaseRef == "" {
35 return nil, fmt.Errorf("NewCodeReviewer: sketchBaseRef must be non-empty")
Earl Lee2e463fb2025-04-17 11:22:22 -070036 }
37 // Confirm that root is in fact the git repo root.
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +000038 root, err := claudetool.FindRepoRoot(r.repoRoot)
Earl Lee2e463fb2025-04-17 11:22:22 -070039 if err != nil {
40 return nil, err
41 }
42 if root != r.repoRoot {
43 return nil, fmt.Errorf("NewCodeReviewer: repoRoot=%q but git repo root is %q", r.repoRoot, root)
44 }
45
46 // Get an initial list of dirty and untracked files.
47 // We'll filter them out later when deciding whether the worktree is clean.
48 status, err := r.repoStatus(ctx)
49 if err != nil {
50 return nil, err
51 }
52 r.initialStatus = status
53 return r, nil
54}
55
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +000056// autoformat formats all files changed in HEAD.
Earl Lee2e463fb2025-04-17 11:22:22 -070057// It returns a list of all files that were formatted.
58// It is best-effort only.
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +000059func (r *CodeReviewer) autoformat(ctx context.Context) []string {
60 // Refuse to format if initial commit is not an ancestor of HEAD
Philip Zeyliger49edc922025-05-14 09:45:45 -070061 err := r.requireHEADDescendantOfSketchBaseRef(ctx)
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +000062 if err != nil {
63 slog.WarnContext(ctx, "CodeReviewer.Autoformat refusing to format", "err", err)
64 return nil
65 }
66
Earl Lee2e463fb2025-04-17 11:22:22 -070067 head, err := r.CurrentCommit(ctx)
68 if err != nil {
69 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get current commit", "err", err)
70 return nil
71 }
72 parent, err := r.ResolveCommit(ctx, "HEAD^1")
73 if err != nil {
74 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get parent commit", "err", err)
75 return nil
76 }
Earl Lee2e463fb2025-04-17 11:22:22 -070077 // Retrieve a list of all files changed
78 // TODO: instead of one git diff --name-only and then N --name-status, do one --name-status.
Philip Zeyliger49edc922025-05-14 09:45:45 -070079 changedFiles, err := r.changedFiles(ctx, r.sketchBaseRef, head)
Earl Lee2e463fb2025-04-17 11:22:22 -070080 if err != nil {
81 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get changed files", "err", err)
82 return nil
83 }
84
85 // General strategy: For all changed files,
86 // run the strictest formatter that passes on the original version.
87 // TODO: add non-Go formatters?
88 // TODO: at a minimum, for common file types, ensure trailing newlines and maybe trim trailing whitespace per line?
89 var fmtFiles []string
90 for _, file := range changedFiles {
91 if !strings.HasSuffix(file, ".go") {
92 continue
93 }
94 fileStatus, err := r.gitFileStatus(ctx, file)
95 if err != nil {
96 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get file status", "file", file, "err", err)
97 continue
98 }
99 if fileStatus == "D" { // deleted, nothing to format
100 continue
101 }
102 code, err := r.getFileContentAtCommit(ctx, file, head)
103 if err != nil {
104 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get file content at head", "file", file, "err", err)
105 continue
106 }
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +0000107 if claudetool.IsAutogeneratedGoFile(code) { // leave autogenerated files alone
Earl Lee2e463fb2025-04-17 11:22:22 -0700108 continue
109 }
110 onDisk, err := os.ReadFile(file)
111 if err != nil {
112 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to read file", "file", file, "err", err)
113 continue
114 }
115 if !bytes.Equal(code, onDisk) { // file has been modified since HEAD
116 slog.WarnContext(ctx, "CodeReviewer.Autoformat file modified since HEAD", "file", file, "err", err)
117 continue
118 }
119 var formatterToUse string
120 if fileStatus == "A" {
121 formatterToUse = "gofumpt" // newly added, so we can format how we please: use gofumpt
122 } else {
123 prev, err := r.getFileContentAtCommit(ctx, file, parent)
124 if err != nil {
125 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get file content at parent", "file", file, "err", err)
126 continue
127 }
128 formatterToUse = r.pickFormatter(ctx, prev) // pick the strictest formatter that passes on the original version
129 }
130
131 // Apply the chosen formatter to the current file
132 newCode := r.runFormatter(ctx, formatterToUse, code)
133 if newCode == nil { // no changes made
134 continue
135 }
136 // write to disk
137 if err := os.WriteFile(file, newCode, 0o600); err != nil {
138 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to write formatted file", "file", file, "err", err)
139 continue
140 }
141 fmtFiles = append(fmtFiles, file)
142 }
143 return fmtFiles
144}
145
146// RequireNormalGitState checks that the git repo state is pretty normal.
147func (r *CodeReviewer) RequireNormalGitState(_ context.Context) error {
148 rebaseDirs := []string{"rebase-merge", "rebase-apply"}
149 for _, dir := range rebaseDirs {
150 _, err := os.Stat(filepath.Join(r.repoRoot, dir))
151 if err == nil {
152 return fmt.Errorf("git repo is not clean: rebase in progress")
153 }
154 }
155 filesReason := map[string]string{
156 "MERGE_HEAD": "merge is in progress",
157 "CHERRY_PICK_HEAD": "cherry-pick is in progress",
158 "REVERT_HEAD": "revert is in progress",
159 "BISECT_LOG": "bisect is in progress",
160 }
161 for file, reason := range filesReason {
162 _, err := os.Stat(filepath.Join(r.repoRoot, file))
163 if err == nil {
164 return fmt.Errorf("git repo is not clean: %s", reason)
165 }
166 }
167 return nil
168}
169
170func (r *CodeReviewer) RequireNoUncommittedChanges(ctx context.Context) error {
171 // Check that there are no uncommitted changes, whether staged or not.
172 // (Changes in r.initialStatus are OK, no other changes are.)
173 statuses, err := r.repoStatus(ctx)
174 if err != nil {
175 return fmt.Errorf("unable to get repo status: %w", err)
176 }
177 uncommitted := new(strings.Builder)
178 for _, status := range statuses {
Josh Bleecher Snydercf191902025-06-04 18:18:40 +0000179 if !statusesContainFile(r.initialStatus, status.Path) {
180 fmt.Fprintf(uncommitted, "%s %s\n", status.RawStatus, status.Path)
Earl Lee2e463fb2025-04-17 11:22:22 -0700181 }
182 }
183 if uncommitted.Len() > 0 {
Josh Bleecher Snyder83b2d352025-05-23 11:39:50 -0700184 return fmt.Errorf("uncommitted changes in repo, please commit relevant changes and revert/delete others:\n%s", uncommitted.String())
Earl Lee2e463fb2025-04-17 11:22:22 -0700185 }
186 return nil
187}
188
Josh Bleecher Snydercf191902025-06-04 18:18:40 +0000189func statusesContainFile(statuses []fileStatus, file string) bool {
190 for _, s := range statuses {
Earl Lee2e463fb2025-04-17 11:22:22 -0700191 if s.Path == file {
192 return true
193 }
194 }
195 return false
196}
197
Josh Bleecher Snydercf191902025-06-04 18:18:40 +0000198// parseGitStatusLine parses a single line from git status --porcelain output.
199// Returns the file path and status, or empty strings if the line should be ignored.
200func parseGitStatusLine(line string) (path, status string) {
201 if len(line) <= 3 {
202 return "", "" // empty line or invalid format
203 }
204 return strings.TrimSpace(line[3:]), line[:2]
205}
206
Earl Lee2e463fb2025-04-17 11:22:22 -0700207type fileStatus struct {
208 Path string
209 RawStatus string // always 2 characters
210}
211
212func (r *CodeReviewer) repoStatus(ctx context.Context) ([]fileStatus, error) {
213 // Run git status --porcelain, split into lines
214 cmd := exec.CommandContext(ctx, "git", "status", "--porcelain")
215 cmd.Dir = r.repoRoot
216 out, err := cmd.CombinedOutput()
217 if err != nil {
218 return nil, fmt.Errorf("failed to run git status: %w\n%s", err, out)
219 }
220 var statuses []fileStatus
221 for line := range strings.Lines(string(out)) {
Josh Bleecher Snydercf191902025-06-04 18:18:40 +0000222 path, status := parseGitStatusLine(line)
223 if path == "" {
224 continue // empty or invalid line
Earl Lee2e463fb2025-04-17 11:22:22 -0700225 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700226 absPath := r.absPath(path)
227 statuses = append(statuses, fileStatus{Path: absPath, RawStatus: status})
228 }
229 return statuses, nil
230}
231
232// CurrentCommit retrieves the current git commit hash
233func (r *CodeReviewer) CurrentCommit(ctx context.Context) (string, error) {
234 return r.ResolveCommit(ctx, "HEAD")
235}
236
237func (r *CodeReviewer) ResolveCommit(ctx context.Context, ref string) (string, error) {
238 cmd := exec.CommandContext(ctx, "git", "rev-parse", ref)
239 cmd.Dir = r.repoRoot
240 out, err := cmd.CombinedOutput()
241 if err != nil {
242 return "", fmt.Errorf("failed to get current commit hash: %w\n%s", err, out)
243 }
244 return strings.TrimSpace(string(out)), nil
245}
246
247func (r *CodeReviewer) absPath(relPath string) string {
248 return filepath.Clean(filepath.Join(r.repoRoot, relPath))
249}
250
251// gitFileStatus returns the status of a file (A for added, M for modified, D for deleted, etc.)
252func (r *CodeReviewer) gitFileStatus(ctx context.Context, file string) (string, error) {
Philip Zeyliger49edc922025-05-14 09:45:45 -0700253 cmd := exec.CommandContext(ctx, "git", "diff", "--name-status", r.sketchBaseRef, "HEAD", "--", file)
Earl Lee2e463fb2025-04-17 11:22:22 -0700254 cmd.Dir = r.repoRoot
255 out, err := cmd.CombinedOutput()
256 if err != nil {
257 return "", fmt.Errorf("failed to get file status: %w\n%s", err, out)
258 }
259 status := strings.TrimSpace(string(out))
260 if status == "" {
261 return "", fmt.Errorf("no status found for file: %s", file)
262 }
263 return string(status[0]), nil
264}
265
266// getFileContentAtCommit retrieves file content at a specific commit
267func (r *CodeReviewer) getFileContentAtCommit(ctx context.Context, file, commit string) ([]byte, error) {
268 relFile, err := filepath.Rel(r.repoRoot, file)
269 if err != nil {
270 slog.WarnContext(ctx, "CodeReviewer.getFileContentAtCommit: failed to get relative path", "repo_root", r.repoRoot, "file", file, "err", err)
271 file = relFile
272 }
273 cmd := exec.CommandContext(ctx, "git", "show", fmt.Sprintf("%s:%s", commit, relFile))
274 cmd.Dir = r.repoRoot
275 out, err := cmd.CombinedOutput()
276 if err != nil {
277 return nil, fmt.Errorf("failed to get file content at commit %s: %w\n%s", commit, err, out)
278 }
279 return out, nil
280}
281
282// runFormatter runs the specified formatter on a file and returns the results.
283// A nil result indicates that the file is unchanged, or that an error occurred.
284func (r *CodeReviewer) runFormatter(ctx context.Context, formatter string, content []byte) []byte {
285 if formatter == "" {
286 return nil // no formatter
287 }
288 // Run the formatter and capture the output
289 cmd := exec.CommandContext(ctx, formatter)
290 cmd.Dir = r.repoRoot
291 cmd.Stdin = bytes.NewReader(content)
292 out, err := cmd.CombinedOutput()
293 if err != nil {
294 // probably a parse error, err on the side of safety
295 return nil
296 }
297 if bytes.Equal(content, out) {
298 return nil // no changes
299 }
300 return out
301}
302
303// formatterWouldChange reports whether a formatter would make changes to the content.
304// If the contents are invalid, it returns false.
305// It works by piping the content to the formatter with the -l flag.
306func (r *CodeReviewer) formatterWouldChange(ctx context.Context, formatter string, content []byte) bool {
307 cmd := exec.CommandContext(ctx, formatter, "-l")
308 cmd.Dir = r.repoRoot
309 cmd.Stdin = bytes.NewReader(content)
310 out, err := cmd.CombinedOutput()
311 if err != nil {
312 // probably a parse error, err on the side of safety
313 return false
314 }
315
316 // If the output is empty, the file passes the formatter
317 // If the output contains "<standard input>", the file would be changed
318 return len(bytes.TrimSpace(out)) > 0
319}
320
321// pickFormatter picks a formatter to use for code.
322// If something goes wrong, it recommends no formatter (empty string).
323func (r *CodeReviewer) pickFormatter(ctx context.Context, code []byte) string {
324 // Test each formatter from strictest to least strict.
325 // Keep the first one that doesn't make changes.
326 formatters := []string{"gofumpt", "goimports", "gofmt"}
327 for _, formatter := range formatters {
328 if r.formatterWouldChange(ctx, formatter, code) {
329 continue
330 }
331 return formatter
332 }
333 return "" // no safe formatter found
334}
335
336// changedFiles retrieves a list of all files changed between two commits
337func (r *CodeReviewer) changedFiles(ctx context.Context, fromCommit, toCommit string) ([]string, error) {
338 cmd := exec.CommandContext(ctx, "git", "diff", "--name-only", fromCommit, toCommit)
339 cmd.Dir = r.repoRoot
340 out, err := cmd.CombinedOutput()
341 if err != nil {
342 return nil, fmt.Errorf("failed to get changed files: %w\n%s", err, out)
343 }
344 var files []string
345 for line := range strings.Lines(string(out)) {
346 line = strings.TrimSpace(line)
347 if len(line) == 0 {
348 continue
349 }
350 path := r.absPath(line)
Josh Bleecher Snydercf191902025-06-04 18:18:40 +0000351 if statusesContainFile(r.initialStatus, path) {
Earl Lee2e463fb2025-04-17 11:22:22 -0700352 continue
353 }
354 files = append(files, path)
355 }
356 return files, nil
357}
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000358
Josh Bleecher Snydercf191902025-06-04 18:18:40 +0000359// runGenerate runs go generate on all packages and returns a list of files changed.
360// Errors returned will be reported to the LLM.
361func (r *CodeReviewer) runGenerate(ctx context.Context, packages []string) ([]string, error) {
362 if len(packages) == 0 {
363 return nil, nil
364 }
365
366 args := []string{"generate"}
367 for _, pkg := range packages {
368 // Sigh. Working around test packages is a PITA.
369 if strings.HasSuffix(pkg, ".test") || strings.HasSuffix(pkg, "_test") {
370 continue
371 }
372 args = append(args, pkg)
373 }
374 gen := exec.CommandContext(ctx, "go", args...)
375 gen.Dir = r.repoRoot
376 out, err := gen.CombinedOutput()
377 if err != nil {
378 return nil, fmt.Errorf("$ go %s\n%s", strings.Join(args, " "), out)
379 }
380
381 status := exec.CommandContext(ctx, "git", "status", "--porcelain")
382 status.Dir = r.repoRoot
383 statusOut, err := status.CombinedOutput()
384 if err != nil {
385 return nil, fmt.Errorf("unable to get git status: %w", err)
386 }
387
388 var changed []string
389 for line := range strings.Lines(string(statusOut)) {
390 path, _ := parseGitStatusLine(line)
391 if path == "" {
392 continue
393 }
Josh Bleecher Snyder95632c82025-06-05 01:23:05 +0000394 absPath := filepath.Join(r.repoRoot, path)
395 if statusesContainFile(r.initialStatus, absPath) {
396 continue
397 }
398 changed = append(changed, absPath)
Josh Bleecher Snydercf191902025-06-04 18:18:40 +0000399 }
400
401 return changed, nil
402}
403
404// isGoRepository checks if the repository contains a go.mod file
405// TODO: check in subdirs?
406func (r *CodeReviewer) isGoRepository() bool {
407 _, err := os.Stat(filepath.Join(r.repoRoot, "go.mod"))
408 return err == nil
409}
410
Josh Bleecher Snyder1ed1cc42025-05-07 20:21:40 +0000411// ModTidy runs go mod tidy if go module files have changed.
412// Returns a list of files changed by go mod tidy (empty if none).
413func (r *CodeReviewer) ModTidy(ctx context.Context) ([]string, error) {
Philip Zeyliger49edc922025-05-14 09:45:45 -0700414 err := r.requireHEADDescendantOfSketchBaseRef(ctx)
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000415 if err != nil {
Josh Bleecher Snyder1ed1cc42025-05-07 20:21:40 +0000416 return nil, fmt.Errorf("cannot run ModTidy: %w", err)
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000417 }
418
Josh Bleecher Snyder1ed1cc42025-05-07 20:21:40 +0000419 // Check if any go.mod, go.sum, etc. files have changed
420 currentCommit, err := r.CurrentCommit(ctx)
421 if err != nil {
422 return nil, fmt.Errorf("failed to get current commit: %w", err)
423 }
424
Philip Zeyliger49edc922025-05-14 09:45:45 -0700425 changedFiles, err := r.changedFiles(ctx, r.sketchBaseRef, currentCommit)
Josh Bleecher Snyder1ed1cc42025-05-07 20:21:40 +0000426 if err != nil {
427 return nil, fmt.Errorf("failed to get changed files: %w", err)
428 }
429
430 // Check if any of the changed files are go module files
431 goModsChanged := false
432 for _, file := range changedFiles {
433 if isGoModFile(file) {
434 goModsChanged = true
435 break
436 }
437 }
438
439 if !goModsChanged {
440 // No go module files changed, so don't run go mod tidy
441 return nil, nil
442 }
443
444 // Run go mod tidy
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000445 cmd := exec.CommandContext(ctx, "go", "mod", "tidy")
446 cmd.Dir = r.repoRoot
447 out, err := cmd.CombinedOutput()
448 if err != nil {
Josh Bleecher Snyder1ed1cc42025-05-07 20:21:40 +0000449 return nil, fmt.Errorf("go mod tidy failed: %w\n%s", err, out)
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000450 }
451
Josh Bleecher Snyder1ed1cc42025-05-07 20:21:40 +0000452 // Check which files were changed by go mod tidy
453 statusCmd := exec.CommandContext(ctx, "git", "status", "--porcelain")
454 statusCmd.Dir = r.repoRoot
455 statusOut, err := statusCmd.CombinedOutput()
456 if err != nil {
457 return nil, fmt.Errorf("unable to get git status: %w", err)
458 }
459
460 var changedByTidy []string
461
462 for line := range strings.Lines(string(statusOut)) {
Josh Bleecher Snydercf191902025-06-04 18:18:40 +0000463 file, _ := parseGitStatusLine(line)
464 if file == "" {
465 continue // empty or invalid line
Josh Bleecher Snyder1ed1cc42025-05-07 20:21:40 +0000466 }
Josh Bleecher Snyder1ed1cc42025-05-07 20:21:40 +0000467 if !isGoModFile(file) {
468 continue
469 }
470 path := filepath.Join(r.repoRoot, file)
471 changedByTidy = append(changedByTidy, path)
472 }
473
474 return changedByTidy, nil
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000475}
476
477// RunMechanicalChecks runs all mechanical checks and returns a message describing any changes made.
478func (r *CodeReviewer) RunMechanicalChecks(ctx context.Context) string {
479 var actions []string
480
481 changed := r.autoformat(ctx)
482 if len(changed) > 0 {
483 actions = append(actions, "autoformatters")
484 }
485
Josh Bleecher Snyder1ed1cc42025-05-07 20:21:40 +0000486 // Run go mod tidy (only if go module files have changed)
487 tidyChanges, err := r.ModTidy(ctx)
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000488 if err != nil {
489 slog.WarnContext(ctx, "CodeReviewer.RunMechanicalChecks: ModTidy failed", "err", err)
Josh Bleecher Snyder1ed1cc42025-05-07 20:21:40 +0000490 }
491 if len(tidyChanges) > 0 {
492 changed = append(changed, tidyChanges...)
493 actions = append(actions, "`go mod tidy`")
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000494 }
495
496 if len(changed) == 0 {
497 return ""
498 }
499
500 slices.Sort(changed)
501
502 msg := fmt.Sprintf(`I ran %s, which updated these files:
503
504%s
505
506Please amend your latest git commit with these changes and then continue with what you were doing.`,
507 strings.Join(actions, " and "),
508 strings.Join(changed, "\n"),
509 )
510
511 return msg
512}
Josh Bleecher Snyder1ed1cc42025-05-07 20:21:40 +0000513
514// isGoModFile returns true if the file is a Go module file (go.mod, go.sum, etc.)
515func isGoModFile(path string) bool {
516 basename := filepath.Base(path)
517 return strings.HasPrefix(basename, "go.")
518}