blob: bf118eeffa3282401061735dfc7f2ac0fbe912a5 [file] [log] [blame]
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +00001package codereview
Earl Lee2e463fb2025-04-17 11:22:22 -07002
3import (
4 "bytes"
5 "context"
6 "fmt"
7 "log/slog"
8 "os"
9 "os/exec"
10 "path/filepath"
11 "strings"
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +000012
13 "sketch.dev/claudetool"
Earl Lee2e463fb2025-04-17 11:22:22 -070014)
15
16// A CodeReviewer manages quality checks.
17type CodeReviewer struct {
18 repoRoot string
19 initialCommit string
20 initialStatus []fileStatus // git status of files at initial commit, absolute paths
21 reviewed []string // history of all commits which have been reviewed
22 initialWorktree string // git worktree at initial commit, absolute path
Josh Bleecher Snydere2518e52025-04-29 11:13:40 -070023 llmReview bool // enables a subagent LLM review
Earl Lee2e463fb2025-04-17 11:22:22 -070024}
25
Josh Bleecher Snydere2518e52025-04-29 11:13:40 -070026const (
27 NoLLMReview = false
28 DoLLMReview = true
29)
30
31func NewCodeReviewer(ctx context.Context, repoRoot, initialCommit string, llmReview bool) (*CodeReviewer, error) {
Earl Lee2e463fb2025-04-17 11:22:22 -070032 r := &CodeReviewer{
33 repoRoot: repoRoot,
34 initialCommit: initialCommit,
Josh Bleecher Snydere2518e52025-04-29 11:13:40 -070035 llmReview: llmReview,
Earl Lee2e463fb2025-04-17 11:22:22 -070036 }
37 if r.repoRoot == "" {
38 return nil, fmt.Errorf("NewCodeReviewer: repoRoot must be non-empty")
39 }
40 if r.initialCommit == "" {
41 return nil, fmt.Errorf("NewCodeReviewer: initialCommit must be non-empty")
42 }
43 // Confirm that root is in fact the git repo root.
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +000044 root, err := claudetool.FindRepoRoot(r.repoRoot)
Earl Lee2e463fb2025-04-17 11:22:22 -070045 if err != nil {
46 return nil, err
47 }
48 if root != r.repoRoot {
49 return nil, fmt.Errorf("NewCodeReviewer: repoRoot=%q but git repo root is %q", r.repoRoot, root)
50 }
51
52 // Get an initial list of dirty and untracked files.
53 // We'll filter them out later when deciding whether the worktree is clean.
54 status, err := r.repoStatus(ctx)
55 if err != nil {
56 return nil, err
57 }
58 r.initialStatus = status
59 return r, nil
60}
61
62// Autoformat formats all files changed in HEAD.
63// It returns a list of all files that were formatted.
64// It is best-effort only.
65func (r *CodeReviewer) Autoformat(ctx context.Context) []string {
66 // Refuse to format if HEAD == r.InitialCommit
67 head, err := r.CurrentCommit(ctx)
68 if err != nil {
69 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get current commit", "err", err)
70 return nil
71 }
72 parent, err := r.ResolveCommit(ctx, "HEAD^1")
73 if err != nil {
74 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get parent commit", "err", err)
75 return nil
76 }
77 if head == r.initialCommit {
78 slog.WarnContext(ctx, "CodeReviewer.Autoformat refusing to format because HEAD == InitialCommit")
79 return nil
80 }
81 // Retrieve a list of all files changed
82 // TODO: instead of one git diff --name-only and then N --name-status, do one --name-status.
83 changedFiles, err := r.changedFiles(ctx, r.initialCommit, head)
84 if err != nil {
85 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get changed files", "err", err)
86 return nil
87 }
88
89 // General strategy: For all changed files,
90 // run the strictest formatter that passes on the original version.
91 // TODO: add non-Go formatters?
92 // TODO: at a minimum, for common file types, ensure trailing newlines and maybe trim trailing whitespace per line?
93 var fmtFiles []string
94 for _, file := range changedFiles {
95 if !strings.HasSuffix(file, ".go") {
96 continue
97 }
98 fileStatus, err := r.gitFileStatus(ctx, file)
99 if err != nil {
100 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get file status", "file", file, "err", err)
101 continue
102 }
103 if fileStatus == "D" { // deleted, nothing to format
104 continue
105 }
106 code, err := r.getFileContentAtCommit(ctx, file, head)
107 if err != nil {
108 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get file content at head", "file", file, "err", err)
109 continue
110 }
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +0000111 if claudetool.IsAutogeneratedGoFile(code) { // leave autogenerated files alone
Earl Lee2e463fb2025-04-17 11:22:22 -0700112 continue
113 }
114 onDisk, err := os.ReadFile(file)
115 if err != nil {
116 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to read file", "file", file, "err", err)
117 continue
118 }
119 if !bytes.Equal(code, onDisk) { // file has been modified since HEAD
120 slog.WarnContext(ctx, "CodeReviewer.Autoformat file modified since HEAD", "file", file, "err", err)
121 continue
122 }
123 var formatterToUse string
124 if fileStatus == "A" {
125 formatterToUse = "gofumpt" // newly added, so we can format how we please: use gofumpt
126 } else {
127 prev, err := r.getFileContentAtCommit(ctx, file, parent)
128 if err != nil {
129 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get file content at parent", "file", file, "err", err)
130 continue
131 }
132 formatterToUse = r.pickFormatter(ctx, prev) // pick the strictest formatter that passes on the original version
133 }
134
135 // Apply the chosen formatter to the current file
136 newCode := r.runFormatter(ctx, formatterToUse, code)
137 if newCode == nil { // no changes made
138 continue
139 }
140 // write to disk
141 if err := os.WriteFile(file, newCode, 0o600); err != nil {
142 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to write formatted file", "file", file, "err", err)
143 continue
144 }
145 fmtFiles = append(fmtFiles, file)
146 }
147 return fmtFiles
148}
149
150// RequireNormalGitState checks that the git repo state is pretty normal.
151func (r *CodeReviewer) RequireNormalGitState(_ context.Context) error {
152 rebaseDirs := []string{"rebase-merge", "rebase-apply"}
153 for _, dir := range rebaseDirs {
154 _, err := os.Stat(filepath.Join(r.repoRoot, dir))
155 if err == nil {
156 return fmt.Errorf("git repo is not clean: rebase in progress")
157 }
158 }
159 filesReason := map[string]string{
160 "MERGE_HEAD": "merge is in progress",
161 "CHERRY_PICK_HEAD": "cherry-pick is in progress",
162 "REVERT_HEAD": "revert is in progress",
163 "BISECT_LOG": "bisect is in progress",
164 }
165 for file, reason := range filesReason {
166 _, err := os.Stat(filepath.Join(r.repoRoot, file))
167 if err == nil {
168 return fmt.Errorf("git repo is not clean: %s", reason)
169 }
170 }
171 return nil
172}
173
174func (r *CodeReviewer) RequireNoUncommittedChanges(ctx context.Context) error {
175 // Check that there are no uncommitted changes, whether staged or not.
176 // (Changes in r.initialStatus are OK, no other changes are.)
177 statuses, err := r.repoStatus(ctx)
178 if err != nil {
179 return fmt.Errorf("unable to get repo status: %w", err)
180 }
181 uncommitted := new(strings.Builder)
182 for _, status := range statuses {
183 if !r.initialStatusesContainFile(status.Path) {
184 fmt.Fprintf(uncommitted, "%s %s\n", status.Path, status.RawStatus)
185 }
186 }
187 if uncommitted.Len() > 0 {
188 return fmt.Errorf("uncommitted changes in repo, please commit or revert:\n%s", uncommitted.String())
189 }
190 return nil
191}
192
193func (r *CodeReviewer) initialStatusesContainFile(file string) bool {
194 for _, s := range r.initialStatus {
195 if s.Path == file {
196 return true
197 }
198 }
199 return false
200}
201
202type fileStatus struct {
203 Path string
204 RawStatus string // always 2 characters
205}
206
207func (r *CodeReviewer) repoStatus(ctx context.Context) ([]fileStatus, error) {
208 // Run git status --porcelain, split into lines
209 cmd := exec.CommandContext(ctx, "git", "status", "--porcelain")
210 cmd.Dir = r.repoRoot
211 out, err := cmd.CombinedOutput()
212 if err != nil {
213 return nil, fmt.Errorf("failed to run git status: %w\n%s", err, out)
214 }
215 var statuses []fileStatus
216 for line := range strings.Lines(string(out)) {
217 if len(line) == 0 {
218 continue
219 }
220 if len(line) < 3 {
221 return nil, fmt.Errorf("invalid status line: %s", line)
222 }
223 path := line[3:]
224 status := line[:2]
225 absPath := r.absPath(path)
226 statuses = append(statuses, fileStatus{Path: absPath, RawStatus: status})
227 }
228 return statuses, nil
229}
230
231// CurrentCommit retrieves the current git commit hash
232func (r *CodeReviewer) CurrentCommit(ctx context.Context) (string, error) {
233 return r.ResolveCommit(ctx, "HEAD")
234}
235
236func (r *CodeReviewer) ResolveCommit(ctx context.Context, ref string) (string, error) {
237 cmd := exec.CommandContext(ctx, "git", "rev-parse", ref)
238 cmd.Dir = r.repoRoot
239 out, err := cmd.CombinedOutput()
240 if err != nil {
241 return "", fmt.Errorf("failed to get current commit hash: %w\n%s", err, out)
242 }
243 return strings.TrimSpace(string(out)), nil
244}
245
246func (r *CodeReviewer) absPath(relPath string) string {
247 return filepath.Clean(filepath.Join(r.repoRoot, relPath))
248}
249
250// gitFileStatus returns the status of a file (A for added, M for modified, D for deleted, etc.)
251func (r *CodeReviewer) gitFileStatus(ctx context.Context, file string) (string, error) {
252 cmd := exec.CommandContext(ctx, "git", "diff", "--name-status", r.initialCommit, "HEAD", "--", file)
253 cmd.Dir = r.repoRoot
254 out, err := cmd.CombinedOutput()
255 if err != nil {
256 return "", fmt.Errorf("failed to get file status: %w\n%s", err, out)
257 }
258 status := strings.TrimSpace(string(out))
259 if status == "" {
260 return "", fmt.Errorf("no status found for file: %s", file)
261 }
262 return string(status[0]), nil
263}
264
265// getFileContentAtCommit retrieves file content at a specific commit
266func (r *CodeReviewer) getFileContentAtCommit(ctx context.Context, file, commit string) ([]byte, error) {
267 relFile, err := filepath.Rel(r.repoRoot, file)
268 if err != nil {
269 slog.WarnContext(ctx, "CodeReviewer.getFileContentAtCommit: failed to get relative path", "repo_root", r.repoRoot, "file", file, "err", err)
270 file = relFile
271 }
272 cmd := exec.CommandContext(ctx, "git", "show", fmt.Sprintf("%s:%s", commit, relFile))
273 cmd.Dir = r.repoRoot
274 out, err := cmd.CombinedOutput()
275 if err != nil {
276 return nil, fmt.Errorf("failed to get file content at commit %s: %w\n%s", commit, err, out)
277 }
278 return out, nil
279}
280
281// runFormatter runs the specified formatter on a file and returns the results.
282// A nil result indicates that the file is unchanged, or that an error occurred.
283func (r *CodeReviewer) runFormatter(ctx context.Context, formatter string, content []byte) []byte {
284 if formatter == "" {
285 return nil // no formatter
286 }
287 // Run the formatter and capture the output
288 cmd := exec.CommandContext(ctx, formatter)
289 cmd.Dir = r.repoRoot
290 cmd.Stdin = bytes.NewReader(content)
291 out, err := cmd.CombinedOutput()
292 if err != nil {
293 // probably a parse error, err on the side of safety
294 return nil
295 }
296 if bytes.Equal(content, out) {
297 return nil // no changes
298 }
299 return out
300}
301
302// formatterWouldChange reports whether a formatter would make changes to the content.
303// If the contents are invalid, it returns false.
304// It works by piping the content to the formatter with the -l flag.
305func (r *CodeReviewer) formatterWouldChange(ctx context.Context, formatter string, content []byte) bool {
306 cmd := exec.CommandContext(ctx, formatter, "-l")
307 cmd.Dir = r.repoRoot
308 cmd.Stdin = bytes.NewReader(content)
309 out, err := cmd.CombinedOutput()
310 if err != nil {
311 // probably a parse error, err on the side of safety
312 return false
313 }
314
315 // If the output is empty, the file passes the formatter
316 // If the output contains "<standard input>", the file would be changed
317 return len(bytes.TrimSpace(out)) > 0
318}
319
320// pickFormatter picks a formatter to use for code.
321// If something goes wrong, it recommends no formatter (empty string).
322func (r *CodeReviewer) pickFormatter(ctx context.Context, code []byte) string {
323 // Test each formatter from strictest to least strict.
324 // Keep the first one that doesn't make changes.
325 formatters := []string{"gofumpt", "goimports", "gofmt"}
326 for _, formatter := range formatters {
327 if r.formatterWouldChange(ctx, formatter, code) {
328 continue
329 }
330 return formatter
331 }
332 return "" // no safe formatter found
333}
334
335// changedFiles retrieves a list of all files changed between two commits
336func (r *CodeReviewer) changedFiles(ctx context.Context, fromCommit, toCommit string) ([]string, error) {
337 cmd := exec.CommandContext(ctx, "git", "diff", "--name-only", fromCommit, toCommit)
338 cmd.Dir = r.repoRoot
339 out, err := cmd.CombinedOutput()
340 if err != nil {
341 return nil, fmt.Errorf("failed to get changed files: %w\n%s", err, out)
342 }
343 var files []string
344 for line := range strings.Lines(string(out)) {
345 line = strings.TrimSpace(line)
346 if len(line) == 0 {
347 continue
348 }
349 path := r.absPath(line)
350 if r.initialStatusesContainFile(path) {
351 continue
352 }
353 files = append(files, path)
354 }
355 return files, nil
356}