blob: 8051bf78d95df0f1d9df48e51c8513528fe821e8 [file] [log] [blame]
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +00001package codereview
Earl Lee2e463fb2025-04-17 11:22:22 -07002
3import (
4 "bytes"
5 "context"
6 "fmt"
7 "log/slog"
8 "os"
9 "os/exec"
10 "path/filepath"
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +000011 "slices"
Earl Lee2e463fb2025-04-17 11:22:22 -070012 "strings"
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +000013
14 "sketch.dev/claudetool"
Earl Lee2e463fb2025-04-17 11:22:22 -070015)
16
17// A CodeReviewer manages quality checks.
18type CodeReviewer struct {
19 repoRoot string
20 initialCommit string
21 initialStatus []fileStatus // git status of files at initial commit, absolute paths
22 reviewed []string // history of all commits which have been reviewed
23 initialWorktree string // git worktree at initial commit, absolute path
Josh Bleecher Snydere2518e52025-04-29 11:13:40 -070024 llmReview bool // enables a subagent LLM review
Earl Lee2e463fb2025-04-17 11:22:22 -070025}
26
Josh Bleecher Snydere2518e52025-04-29 11:13:40 -070027const (
28 NoLLMReview = false
29 DoLLMReview = true
30)
31
32func NewCodeReviewer(ctx context.Context, repoRoot, initialCommit string, llmReview bool) (*CodeReviewer, error) {
Earl Lee2e463fb2025-04-17 11:22:22 -070033 r := &CodeReviewer{
34 repoRoot: repoRoot,
35 initialCommit: initialCommit,
Josh Bleecher Snydere2518e52025-04-29 11:13:40 -070036 llmReview: llmReview,
Earl Lee2e463fb2025-04-17 11:22:22 -070037 }
38 if r.repoRoot == "" {
39 return nil, fmt.Errorf("NewCodeReviewer: repoRoot must be non-empty")
40 }
41 if r.initialCommit == "" {
42 return nil, fmt.Errorf("NewCodeReviewer: initialCommit must be non-empty")
43 }
44 // Confirm that root is in fact the git repo root.
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +000045 root, err := claudetool.FindRepoRoot(r.repoRoot)
Earl Lee2e463fb2025-04-17 11:22:22 -070046 if err != nil {
47 return nil, err
48 }
49 if root != r.repoRoot {
50 return nil, fmt.Errorf("NewCodeReviewer: repoRoot=%q but git repo root is %q", r.repoRoot, root)
51 }
52
53 // Get an initial list of dirty and untracked files.
54 // We'll filter them out later when deciding whether the worktree is clean.
55 status, err := r.repoStatus(ctx)
56 if err != nil {
57 return nil, err
58 }
59 r.initialStatus = status
60 return r, nil
61}
62
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +000063// autoformat formats all files changed in HEAD.
Earl Lee2e463fb2025-04-17 11:22:22 -070064// It returns a list of all files that were formatted.
65// It is best-effort only.
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +000066func (r *CodeReviewer) autoformat(ctx context.Context) []string {
67 // Refuse to format if initial commit is not an ancestor of HEAD
68 err := r.requireHEADDescendantOfInitialCommit(ctx)
69 if err != nil {
70 slog.WarnContext(ctx, "CodeReviewer.Autoformat refusing to format", "err", err)
71 return nil
72 }
73
Earl Lee2e463fb2025-04-17 11:22:22 -070074 head, err := r.CurrentCommit(ctx)
75 if err != nil {
76 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get current commit", "err", err)
77 return nil
78 }
79 parent, err := r.ResolveCommit(ctx, "HEAD^1")
80 if err != nil {
81 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get parent commit", "err", err)
82 return nil
83 }
Earl Lee2e463fb2025-04-17 11:22:22 -070084 // Retrieve a list of all files changed
85 // TODO: instead of one git diff --name-only and then N --name-status, do one --name-status.
86 changedFiles, err := r.changedFiles(ctx, r.initialCommit, head)
87 if err != nil {
88 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get changed files", "err", err)
89 return nil
90 }
91
92 // General strategy: For all changed files,
93 // run the strictest formatter that passes on the original version.
94 // TODO: add non-Go formatters?
95 // TODO: at a minimum, for common file types, ensure trailing newlines and maybe trim trailing whitespace per line?
96 var fmtFiles []string
97 for _, file := range changedFiles {
98 if !strings.HasSuffix(file, ".go") {
99 continue
100 }
101 fileStatus, err := r.gitFileStatus(ctx, file)
102 if err != nil {
103 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get file status", "file", file, "err", err)
104 continue
105 }
106 if fileStatus == "D" { // deleted, nothing to format
107 continue
108 }
109 code, err := r.getFileContentAtCommit(ctx, file, head)
110 if err != nil {
111 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get file content at head", "file", file, "err", err)
112 continue
113 }
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +0000114 if claudetool.IsAutogeneratedGoFile(code) { // leave autogenerated files alone
Earl Lee2e463fb2025-04-17 11:22:22 -0700115 continue
116 }
117 onDisk, err := os.ReadFile(file)
118 if err != nil {
119 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to read file", "file", file, "err", err)
120 continue
121 }
122 if !bytes.Equal(code, onDisk) { // file has been modified since HEAD
123 slog.WarnContext(ctx, "CodeReviewer.Autoformat file modified since HEAD", "file", file, "err", err)
124 continue
125 }
126 var formatterToUse string
127 if fileStatus == "A" {
128 formatterToUse = "gofumpt" // newly added, so we can format how we please: use gofumpt
129 } else {
130 prev, err := r.getFileContentAtCommit(ctx, file, parent)
131 if err != nil {
132 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get file content at parent", "file", file, "err", err)
133 continue
134 }
135 formatterToUse = r.pickFormatter(ctx, prev) // pick the strictest formatter that passes on the original version
136 }
137
138 // Apply the chosen formatter to the current file
139 newCode := r.runFormatter(ctx, formatterToUse, code)
140 if newCode == nil { // no changes made
141 continue
142 }
143 // write to disk
144 if err := os.WriteFile(file, newCode, 0o600); err != nil {
145 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to write formatted file", "file", file, "err", err)
146 continue
147 }
148 fmtFiles = append(fmtFiles, file)
149 }
150 return fmtFiles
151}
152
153// RequireNormalGitState checks that the git repo state is pretty normal.
154func (r *CodeReviewer) RequireNormalGitState(_ context.Context) error {
155 rebaseDirs := []string{"rebase-merge", "rebase-apply"}
156 for _, dir := range rebaseDirs {
157 _, err := os.Stat(filepath.Join(r.repoRoot, dir))
158 if err == nil {
159 return fmt.Errorf("git repo is not clean: rebase in progress")
160 }
161 }
162 filesReason := map[string]string{
163 "MERGE_HEAD": "merge is in progress",
164 "CHERRY_PICK_HEAD": "cherry-pick is in progress",
165 "REVERT_HEAD": "revert is in progress",
166 "BISECT_LOG": "bisect is in progress",
167 }
168 for file, reason := range filesReason {
169 _, err := os.Stat(filepath.Join(r.repoRoot, file))
170 if err == nil {
171 return fmt.Errorf("git repo is not clean: %s", reason)
172 }
173 }
174 return nil
175}
176
177func (r *CodeReviewer) RequireNoUncommittedChanges(ctx context.Context) error {
178 // Check that there are no uncommitted changes, whether staged or not.
179 // (Changes in r.initialStatus are OK, no other changes are.)
180 statuses, err := r.repoStatus(ctx)
181 if err != nil {
182 return fmt.Errorf("unable to get repo status: %w", err)
183 }
184 uncommitted := new(strings.Builder)
185 for _, status := range statuses {
186 if !r.initialStatusesContainFile(status.Path) {
187 fmt.Fprintf(uncommitted, "%s %s\n", status.Path, status.RawStatus)
188 }
189 }
190 if uncommitted.Len() > 0 {
191 return fmt.Errorf("uncommitted changes in repo, please commit or revert:\n%s", uncommitted.String())
192 }
193 return nil
194}
195
196func (r *CodeReviewer) initialStatusesContainFile(file string) bool {
197 for _, s := range r.initialStatus {
198 if s.Path == file {
199 return true
200 }
201 }
202 return false
203}
204
205type fileStatus struct {
206 Path string
207 RawStatus string // always 2 characters
208}
209
210func (r *CodeReviewer) repoStatus(ctx context.Context) ([]fileStatus, error) {
211 // Run git status --porcelain, split into lines
212 cmd := exec.CommandContext(ctx, "git", "status", "--porcelain")
213 cmd.Dir = r.repoRoot
214 out, err := cmd.CombinedOutput()
215 if err != nil {
216 return nil, fmt.Errorf("failed to run git status: %w\n%s", err, out)
217 }
218 var statuses []fileStatus
219 for line := range strings.Lines(string(out)) {
220 if len(line) == 0 {
221 continue
222 }
223 if len(line) < 3 {
224 return nil, fmt.Errorf("invalid status line: %s", line)
225 }
226 path := line[3:]
227 status := line[:2]
228 absPath := r.absPath(path)
229 statuses = append(statuses, fileStatus{Path: absPath, RawStatus: status})
230 }
231 return statuses, nil
232}
233
234// CurrentCommit retrieves the current git commit hash
235func (r *CodeReviewer) CurrentCommit(ctx context.Context) (string, error) {
236 return r.ResolveCommit(ctx, "HEAD")
237}
238
239func (r *CodeReviewer) ResolveCommit(ctx context.Context, ref string) (string, error) {
240 cmd := exec.CommandContext(ctx, "git", "rev-parse", ref)
241 cmd.Dir = r.repoRoot
242 out, err := cmd.CombinedOutput()
243 if err != nil {
244 return "", fmt.Errorf("failed to get current commit hash: %w\n%s", err, out)
245 }
246 return strings.TrimSpace(string(out)), nil
247}
248
249func (r *CodeReviewer) absPath(relPath string) string {
250 return filepath.Clean(filepath.Join(r.repoRoot, relPath))
251}
252
253// gitFileStatus returns the status of a file (A for added, M for modified, D for deleted, etc.)
254func (r *CodeReviewer) gitFileStatus(ctx context.Context, file string) (string, error) {
255 cmd := exec.CommandContext(ctx, "git", "diff", "--name-status", r.initialCommit, "HEAD", "--", file)
256 cmd.Dir = r.repoRoot
257 out, err := cmd.CombinedOutput()
258 if err != nil {
259 return "", fmt.Errorf("failed to get file status: %w\n%s", err, out)
260 }
261 status := strings.TrimSpace(string(out))
262 if status == "" {
263 return "", fmt.Errorf("no status found for file: %s", file)
264 }
265 return string(status[0]), nil
266}
267
268// getFileContentAtCommit retrieves file content at a specific commit
269func (r *CodeReviewer) getFileContentAtCommit(ctx context.Context, file, commit string) ([]byte, error) {
270 relFile, err := filepath.Rel(r.repoRoot, file)
271 if err != nil {
272 slog.WarnContext(ctx, "CodeReviewer.getFileContentAtCommit: failed to get relative path", "repo_root", r.repoRoot, "file", file, "err", err)
273 file = relFile
274 }
275 cmd := exec.CommandContext(ctx, "git", "show", fmt.Sprintf("%s:%s", commit, relFile))
276 cmd.Dir = r.repoRoot
277 out, err := cmd.CombinedOutput()
278 if err != nil {
279 return nil, fmt.Errorf("failed to get file content at commit %s: %w\n%s", commit, err, out)
280 }
281 return out, nil
282}
283
284// runFormatter runs the specified formatter on a file and returns the results.
285// A nil result indicates that the file is unchanged, or that an error occurred.
286func (r *CodeReviewer) runFormatter(ctx context.Context, formatter string, content []byte) []byte {
287 if formatter == "" {
288 return nil // no formatter
289 }
290 // Run the formatter and capture the output
291 cmd := exec.CommandContext(ctx, formatter)
292 cmd.Dir = r.repoRoot
293 cmd.Stdin = bytes.NewReader(content)
294 out, err := cmd.CombinedOutput()
295 if err != nil {
296 // probably a parse error, err on the side of safety
297 return nil
298 }
299 if bytes.Equal(content, out) {
300 return nil // no changes
301 }
302 return out
303}
304
305// formatterWouldChange reports whether a formatter would make changes to the content.
306// If the contents are invalid, it returns false.
307// It works by piping the content to the formatter with the -l flag.
308func (r *CodeReviewer) formatterWouldChange(ctx context.Context, formatter string, content []byte) bool {
309 cmd := exec.CommandContext(ctx, formatter, "-l")
310 cmd.Dir = r.repoRoot
311 cmd.Stdin = bytes.NewReader(content)
312 out, err := cmd.CombinedOutput()
313 if err != nil {
314 // probably a parse error, err on the side of safety
315 return false
316 }
317
318 // If the output is empty, the file passes the formatter
319 // If the output contains "<standard input>", the file would be changed
320 return len(bytes.TrimSpace(out)) > 0
321}
322
323// pickFormatter picks a formatter to use for code.
324// If something goes wrong, it recommends no formatter (empty string).
325func (r *CodeReviewer) pickFormatter(ctx context.Context, code []byte) string {
326 // Test each formatter from strictest to least strict.
327 // Keep the first one that doesn't make changes.
328 formatters := []string{"gofumpt", "goimports", "gofmt"}
329 for _, formatter := range formatters {
330 if r.formatterWouldChange(ctx, formatter, code) {
331 continue
332 }
333 return formatter
334 }
335 return "" // no safe formatter found
336}
337
338// changedFiles retrieves a list of all files changed between two commits
339func (r *CodeReviewer) changedFiles(ctx context.Context, fromCommit, toCommit string) ([]string, error) {
340 cmd := exec.CommandContext(ctx, "git", "diff", "--name-only", fromCommit, toCommit)
341 cmd.Dir = r.repoRoot
342 out, err := cmd.CombinedOutput()
343 if err != nil {
344 return nil, fmt.Errorf("failed to get changed files: %w\n%s", err, out)
345 }
346 var files []string
347 for line := range strings.Lines(string(out)) {
348 line = strings.TrimSpace(line)
349 if len(line) == 0 {
350 continue
351 }
352 path := r.absPath(line)
353 if r.initialStatusesContainFile(path) {
354 continue
355 }
356 files = append(files, path)
357 }
358 return files, nil
359}
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000360
Josh Bleecher Snyder1ed1cc42025-05-07 20:21:40 +0000361// ModTidy runs go mod tidy if go module files have changed.
362// Returns a list of files changed by go mod tidy (empty if none).
363func (r *CodeReviewer) ModTidy(ctx context.Context) ([]string, error) {
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000364 err := r.requireHEADDescendantOfInitialCommit(ctx)
365 if err != nil {
Josh Bleecher Snyder1ed1cc42025-05-07 20:21:40 +0000366 return nil, fmt.Errorf("cannot run ModTidy: %w", err)
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000367 }
368
Josh Bleecher Snyder1ed1cc42025-05-07 20:21:40 +0000369 // Check if any go.mod, go.sum, etc. files have changed
370 currentCommit, err := r.CurrentCommit(ctx)
371 if err != nil {
372 return nil, fmt.Errorf("failed to get current commit: %w", err)
373 }
374
375 changedFiles, err := r.changedFiles(ctx, r.initialCommit, currentCommit)
376 if err != nil {
377 return nil, fmt.Errorf("failed to get changed files: %w", err)
378 }
379
380 // Check if any of the changed files are go module files
381 goModsChanged := false
382 for _, file := range changedFiles {
383 if isGoModFile(file) {
384 goModsChanged = true
385 break
386 }
387 }
388
389 if !goModsChanged {
390 // No go module files changed, so don't run go mod tidy
391 return nil, nil
392 }
393
394 // Run go mod tidy
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000395 cmd := exec.CommandContext(ctx, "go", "mod", "tidy")
396 cmd.Dir = r.repoRoot
397 out, err := cmd.CombinedOutput()
398 if err != nil {
Josh Bleecher Snyder1ed1cc42025-05-07 20:21:40 +0000399 return nil, fmt.Errorf("go mod tidy failed: %w\n%s", err, out)
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000400 }
401
Josh Bleecher Snyder1ed1cc42025-05-07 20:21:40 +0000402 // Check which files were changed by go mod tidy
403 statusCmd := exec.CommandContext(ctx, "git", "status", "--porcelain")
404 statusCmd.Dir = r.repoRoot
405 statusOut, err := statusCmd.CombinedOutput()
406 if err != nil {
407 return nil, fmt.Errorf("unable to get git status: %w", err)
408 }
409
410 var changedByTidy []string
411
412 for line := range strings.Lines(string(statusOut)) {
413 if len(line) <= 3 {
414 // empty line, defensiveness to avoid panics
415 continue
416 }
417 file := line[3:]
418 if !isGoModFile(file) {
419 continue
420 }
421 path := filepath.Join(r.repoRoot, file)
422 changedByTidy = append(changedByTidy, path)
423 }
424
425 return changedByTidy, nil
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000426}
427
428// RunMechanicalChecks runs all mechanical checks and returns a message describing any changes made.
429func (r *CodeReviewer) RunMechanicalChecks(ctx context.Context) string {
430 var actions []string
431
432 changed := r.autoformat(ctx)
433 if len(changed) > 0 {
434 actions = append(actions, "autoformatters")
435 }
436
Josh Bleecher Snyder1ed1cc42025-05-07 20:21:40 +0000437 // Run go mod tidy (only if go module files have changed)
438 tidyChanges, err := r.ModTidy(ctx)
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000439 if err != nil {
440 slog.WarnContext(ctx, "CodeReviewer.RunMechanicalChecks: ModTidy failed", "err", err)
Josh Bleecher Snyder1ed1cc42025-05-07 20:21:40 +0000441 }
442 if len(tidyChanges) > 0 {
443 changed = append(changed, tidyChanges...)
444 actions = append(actions, "`go mod tidy`")
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000445 }
446
447 if len(changed) == 0 {
448 return ""
449 }
450
451 slices.Sort(changed)
452
453 msg := fmt.Sprintf(`I ran %s, which updated these files:
454
455%s
456
457Please amend your latest git commit with these changes and then continue with what you were doing.`,
458 strings.Join(actions, " and "),
459 strings.Join(changed, "\n"),
460 )
461
462 return msg
463}
Josh Bleecher Snyder1ed1cc42025-05-07 20:21:40 +0000464
465// isGoModFile returns true if the file is a Go module file (go.mod, go.sum, etc.)
466func isGoModFile(path string) bool {
467 basename := filepath.Base(path)
468 return strings.HasPrefix(basename, "go.")
469}