blob: 2a9f9ea6156c14b72bdc09c53ecb33b77fa85a78 [file] [log] [blame]
Earl Lee2e463fb2025-04-17 11:22:22 -07001package claudetool
2
3import (
4 "bytes"
5 "context"
6 "fmt"
7 "log/slog"
8 "os"
9 "os/exec"
10 "path/filepath"
11 "strings"
12)
13
14// A CodeReviewer manages quality checks.
15type CodeReviewer struct {
16 repoRoot string
17 initialCommit string
18 initialStatus []fileStatus // git status of files at initial commit, absolute paths
19 reviewed []string // history of all commits which have been reviewed
20 initialWorktree string // git worktree at initial commit, absolute path
Josh Bleecher Snydere2518e52025-04-29 11:13:40 -070021 llmReview bool // enables a subagent LLM review
Earl Lee2e463fb2025-04-17 11:22:22 -070022}
23
Josh Bleecher Snydere2518e52025-04-29 11:13:40 -070024const (
25 NoLLMReview = false
26 DoLLMReview = true
27)
28
29func NewCodeReviewer(ctx context.Context, repoRoot, initialCommit string, llmReview bool) (*CodeReviewer, error) {
Earl Lee2e463fb2025-04-17 11:22:22 -070030 r := &CodeReviewer{
31 repoRoot: repoRoot,
32 initialCommit: initialCommit,
Josh Bleecher Snydere2518e52025-04-29 11:13:40 -070033 llmReview: llmReview,
Earl Lee2e463fb2025-04-17 11:22:22 -070034 }
35 if r.repoRoot == "" {
36 return nil, fmt.Errorf("NewCodeReviewer: repoRoot must be non-empty")
37 }
38 if r.initialCommit == "" {
39 return nil, fmt.Errorf("NewCodeReviewer: initialCommit must be non-empty")
40 }
41 // Confirm that root is in fact the git repo root.
42 root, err := findRepoRoot(r.repoRoot)
43 if err != nil {
44 return nil, err
45 }
46 if root != r.repoRoot {
47 return nil, fmt.Errorf("NewCodeReviewer: repoRoot=%q but git repo root is %q", r.repoRoot, root)
48 }
49
50 // Get an initial list of dirty and untracked files.
51 // We'll filter them out later when deciding whether the worktree is clean.
52 status, err := r.repoStatus(ctx)
53 if err != nil {
54 return nil, err
55 }
56 r.initialStatus = status
57 return r, nil
58}
59
60// Autoformat formats all files changed in HEAD.
61// It returns a list of all files that were formatted.
62// It is best-effort only.
63func (r *CodeReviewer) Autoformat(ctx context.Context) []string {
64 // Refuse to format if HEAD == r.InitialCommit
65 head, err := r.CurrentCommit(ctx)
66 if err != nil {
67 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get current commit", "err", err)
68 return nil
69 }
70 parent, err := r.ResolveCommit(ctx, "HEAD^1")
71 if err != nil {
72 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get parent commit", "err", err)
73 return nil
74 }
75 if head == r.initialCommit {
76 slog.WarnContext(ctx, "CodeReviewer.Autoformat refusing to format because HEAD == InitialCommit")
77 return nil
78 }
79 // Retrieve a list of all files changed
80 // TODO: instead of one git diff --name-only and then N --name-status, do one --name-status.
81 changedFiles, err := r.changedFiles(ctx, r.initialCommit, head)
82 if err != nil {
83 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get changed files", "err", err)
84 return nil
85 }
86
87 // General strategy: For all changed files,
88 // run the strictest formatter that passes on the original version.
89 // TODO: add non-Go formatters?
90 // TODO: at a minimum, for common file types, ensure trailing newlines and maybe trim trailing whitespace per line?
91 var fmtFiles []string
92 for _, file := range changedFiles {
93 if !strings.HasSuffix(file, ".go") {
94 continue
95 }
96 fileStatus, err := r.gitFileStatus(ctx, file)
97 if err != nil {
98 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get file status", "file", file, "err", err)
99 continue
100 }
101 if fileStatus == "D" { // deleted, nothing to format
102 continue
103 }
104 code, err := r.getFileContentAtCommit(ctx, file, head)
105 if err != nil {
106 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get file content at head", "file", file, "err", err)
107 continue
108 }
109 if isAutogeneratedGoFile(code) { // leave autogenerated files alone
110 continue
111 }
112 onDisk, err := os.ReadFile(file)
113 if err != nil {
114 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to read file", "file", file, "err", err)
115 continue
116 }
117 if !bytes.Equal(code, onDisk) { // file has been modified since HEAD
118 slog.WarnContext(ctx, "CodeReviewer.Autoformat file modified since HEAD", "file", file, "err", err)
119 continue
120 }
121 var formatterToUse string
122 if fileStatus == "A" {
123 formatterToUse = "gofumpt" // newly added, so we can format how we please: use gofumpt
124 } else {
125 prev, err := r.getFileContentAtCommit(ctx, file, parent)
126 if err != nil {
127 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get file content at parent", "file", file, "err", err)
128 continue
129 }
130 formatterToUse = r.pickFormatter(ctx, prev) // pick the strictest formatter that passes on the original version
131 }
132
133 // Apply the chosen formatter to the current file
134 newCode := r.runFormatter(ctx, formatterToUse, code)
135 if newCode == nil { // no changes made
136 continue
137 }
138 // write to disk
139 if err := os.WriteFile(file, newCode, 0o600); err != nil {
140 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to write formatted file", "file", file, "err", err)
141 continue
142 }
143 fmtFiles = append(fmtFiles, file)
144 }
145 return fmtFiles
146}
147
148// RequireNormalGitState checks that the git repo state is pretty normal.
149func (r *CodeReviewer) RequireNormalGitState(_ context.Context) error {
150 rebaseDirs := []string{"rebase-merge", "rebase-apply"}
151 for _, dir := range rebaseDirs {
152 _, err := os.Stat(filepath.Join(r.repoRoot, dir))
153 if err == nil {
154 return fmt.Errorf("git repo is not clean: rebase in progress")
155 }
156 }
157 filesReason := map[string]string{
158 "MERGE_HEAD": "merge is in progress",
159 "CHERRY_PICK_HEAD": "cherry-pick is in progress",
160 "REVERT_HEAD": "revert is in progress",
161 "BISECT_LOG": "bisect is in progress",
162 }
163 for file, reason := range filesReason {
164 _, err := os.Stat(filepath.Join(r.repoRoot, file))
165 if err == nil {
166 return fmt.Errorf("git repo is not clean: %s", reason)
167 }
168 }
169 return nil
170}
171
172func (r *CodeReviewer) RequireNoUncommittedChanges(ctx context.Context) error {
173 // Check that there are no uncommitted changes, whether staged or not.
174 // (Changes in r.initialStatus are OK, no other changes are.)
175 statuses, err := r.repoStatus(ctx)
176 if err != nil {
177 return fmt.Errorf("unable to get repo status: %w", err)
178 }
179 uncommitted := new(strings.Builder)
180 for _, status := range statuses {
181 if !r.initialStatusesContainFile(status.Path) {
182 fmt.Fprintf(uncommitted, "%s %s\n", status.Path, status.RawStatus)
183 }
184 }
185 if uncommitted.Len() > 0 {
186 return fmt.Errorf("uncommitted changes in repo, please commit or revert:\n%s", uncommitted.String())
187 }
188 return nil
189}
190
191func (r *CodeReviewer) initialStatusesContainFile(file string) bool {
192 for _, s := range r.initialStatus {
193 if s.Path == file {
194 return true
195 }
196 }
197 return false
198}
199
200type fileStatus struct {
201 Path string
202 RawStatus string // always 2 characters
203}
204
205func (r *CodeReviewer) repoStatus(ctx context.Context) ([]fileStatus, error) {
206 // Run git status --porcelain, split into lines
207 cmd := exec.CommandContext(ctx, "git", "status", "--porcelain")
208 cmd.Dir = r.repoRoot
209 out, err := cmd.CombinedOutput()
210 if err != nil {
211 return nil, fmt.Errorf("failed to run git status: %w\n%s", err, out)
212 }
213 var statuses []fileStatus
214 for line := range strings.Lines(string(out)) {
215 if len(line) == 0 {
216 continue
217 }
218 if len(line) < 3 {
219 return nil, fmt.Errorf("invalid status line: %s", line)
220 }
221 path := line[3:]
222 status := line[:2]
223 absPath := r.absPath(path)
224 statuses = append(statuses, fileStatus{Path: absPath, RawStatus: status})
225 }
226 return statuses, nil
227}
228
229// CurrentCommit retrieves the current git commit hash
230func (r *CodeReviewer) CurrentCommit(ctx context.Context) (string, error) {
231 return r.ResolveCommit(ctx, "HEAD")
232}
233
234func (r *CodeReviewer) ResolveCommit(ctx context.Context, ref string) (string, error) {
235 cmd := exec.CommandContext(ctx, "git", "rev-parse", ref)
236 cmd.Dir = r.repoRoot
237 out, err := cmd.CombinedOutput()
238 if err != nil {
239 return "", fmt.Errorf("failed to get current commit hash: %w\n%s", err, out)
240 }
241 return strings.TrimSpace(string(out)), nil
242}
243
244func (r *CodeReviewer) absPath(relPath string) string {
245 return filepath.Clean(filepath.Join(r.repoRoot, relPath))
246}
247
248// gitFileStatus returns the status of a file (A for added, M for modified, D for deleted, etc.)
249func (r *CodeReviewer) gitFileStatus(ctx context.Context, file string) (string, error) {
250 cmd := exec.CommandContext(ctx, "git", "diff", "--name-status", r.initialCommit, "HEAD", "--", file)
251 cmd.Dir = r.repoRoot
252 out, err := cmd.CombinedOutput()
253 if err != nil {
254 return "", fmt.Errorf("failed to get file status: %w\n%s", err, out)
255 }
256 status := strings.TrimSpace(string(out))
257 if status == "" {
258 return "", fmt.Errorf("no status found for file: %s", file)
259 }
260 return string(status[0]), nil
261}
262
263// getFileContentAtCommit retrieves file content at a specific commit
264func (r *CodeReviewer) getFileContentAtCommit(ctx context.Context, file, commit string) ([]byte, error) {
265 relFile, err := filepath.Rel(r.repoRoot, file)
266 if err != nil {
267 slog.WarnContext(ctx, "CodeReviewer.getFileContentAtCommit: failed to get relative path", "repo_root", r.repoRoot, "file", file, "err", err)
268 file = relFile
269 }
270 cmd := exec.CommandContext(ctx, "git", "show", fmt.Sprintf("%s:%s", commit, relFile))
271 cmd.Dir = r.repoRoot
272 out, err := cmd.CombinedOutput()
273 if err != nil {
274 return nil, fmt.Errorf("failed to get file content at commit %s: %w\n%s", commit, err, out)
275 }
276 return out, nil
277}
278
279// runFormatter runs the specified formatter on a file and returns the results.
280// A nil result indicates that the file is unchanged, or that an error occurred.
281func (r *CodeReviewer) runFormatter(ctx context.Context, formatter string, content []byte) []byte {
282 if formatter == "" {
283 return nil // no formatter
284 }
285 // Run the formatter and capture the output
286 cmd := exec.CommandContext(ctx, formatter)
287 cmd.Dir = r.repoRoot
288 cmd.Stdin = bytes.NewReader(content)
289 out, err := cmd.CombinedOutput()
290 if err != nil {
291 // probably a parse error, err on the side of safety
292 return nil
293 }
294 if bytes.Equal(content, out) {
295 return nil // no changes
296 }
297 return out
298}
299
300// formatterWouldChange reports whether a formatter would make changes to the content.
301// If the contents are invalid, it returns false.
302// It works by piping the content to the formatter with the -l flag.
303func (r *CodeReviewer) formatterWouldChange(ctx context.Context, formatter string, content []byte) bool {
304 cmd := exec.CommandContext(ctx, formatter, "-l")
305 cmd.Dir = r.repoRoot
306 cmd.Stdin = bytes.NewReader(content)
307 out, err := cmd.CombinedOutput()
308 if err != nil {
309 // probably a parse error, err on the side of safety
310 return false
311 }
312
313 // If the output is empty, the file passes the formatter
314 // If the output contains "<standard input>", the file would be changed
315 return len(bytes.TrimSpace(out)) > 0
316}
317
318// pickFormatter picks a formatter to use for code.
319// If something goes wrong, it recommends no formatter (empty string).
320func (r *CodeReviewer) pickFormatter(ctx context.Context, code []byte) string {
321 // Test each formatter from strictest to least strict.
322 // Keep the first one that doesn't make changes.
323 formatters := []string{"gofumpt", "goimports", "gofmt"}
324 for _, formatter := range formatters {
325 if r.formatterWouldChange(ctx, formatter, code) {
326 continue
327 }
328 return formatter
329 }
330 return "" // no safe formatter found
331}
332
333// changedFiles retrieves a list of all files changed between two commits
334func (r *CodeReviewer) changedFiles(ctx context.Context, fromCommit, toCommit string) ([]string, error) {
335 cmd := exec.CommandContext(ctx, "git", "diff", "--name-only", fromCommit, toCommit)
336 cmd.Dir = r.repoRoot
337 out, err := cmd.CombinedOutput()
338 if err != nil {
339 return nil, fmt.Errorf("failed to get changed files: %w\n%s", err, out)
340 }
341 var files []string
342 for line := range strings.Lines(string(out)) {
343 line = strings.TrimSpace(line)
344 if len(line) == 0 {
345 continue
346 }
347 path := r.absPath(line)
348 if r.initialStatusesContainFile(path) {
349 continue
350 }
351 files = append(files, path)
352 }
353 return files, nil
354}