blob: f305b4b0cf47901ba3dd7fd9a53b5bee342ccc51 [file] [log] [blame]
Earl Lee2e463fb2025-04-17 11:22:22 -07001package claudetool
2
3import (
4 "bytes"
5 "context"
6 "fmt"
7 "log/slog"
8 "os"
9 "os/exec"
10 "path/filepath"
11 "strings"
12)
13
14// A CodeReviewer manages quality checks.
15type CodeReviewer struct {
16 repoRoot string
17 initialCommit string
18 initialStatus []fileStatus // git status of files at initial commit, absolute paths
19 reviewed []string // history of all commits which have been reviewed
20 initialWorktree string // git worktree at initial commit, absolute path
21}
22
23func NewCodeReviewer(ctx context.Context, repoRoot, initialCommit string) (*CodeReviewer, error) {
24 r := &CodeReviewer{
25 repoRoot: repoRoot,
26 initialCommit: initialCommit,
27 }
28 if r.repoRoot == "" {
29 return nil, fmt.Errorf("NewCodeReviewer: repoRoot must be non-empty")
30 }
31 if r.initialCommit == "" {
32 return nil, fmt.Errorf("NewCodeReviewer: initialCommit must be non-empty")
33 }
34 // Confirm that root is in fact the git repo root.
35 root, err := findRepoRoot(r.repoRoot)
36 if err != nil {
37 return nil, err
38 }
39 if root != r.repoRoot {
40 return nil, fmt.Errorf("NewCodeReviewer: repoRoot=%q but git repo root is %q", r.repoRoot, root)
41 }
42
43 // Get an initial list of dirty and untracked files.
44 // We'll filter them out later when deciding whether the worktree is clean.
45 status, err := r.repoStatus(ctx)
46 if err != nil {
47 return nil, err
48 }
49 r.initialStatus = status
50 return r, nil
51}
52
53// Autoformat formats all files changed in HEAD.
54// It returns a list of all files that were formatted.
55// It is best-effort only.
56func (r *CodeReviewer) Autoformat(ctx context.Context) []string {
57 // Refuse to format if HEAD == r.InitialCommit
58 head, err := r.CurrentCommit(ctx)
59 if err != nil {
60 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get current commit", "err", err)
61 return nil
62 }
63 parent, err := r.ResolveCommit(ctx, "HEAD^1")
64 if err != nil {
65 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get parent commit", "err", err)
66 return nil
67 }
68 if head == r.initialCommit {
69 slog.WarnContext(ctx, "CodeReviewer.Autoformat refusing to format because HEAD == InitialCommit")
70 return nil
71 }
72 // Retrieve a list of all files changed
73 // TODO: instead of one git diff --name-only and then N --name-status, do one --name-status.
74 changedFiles, err := r.changedFiles(ctx, r.initialCommit, head)
75 if err != nil {
76 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get changed files", "err", err)
77 return nil
78 }
79
80 // General strategy: For all changed files,
81 // run the strictest formatter that passes on the original version.
82 // TODO: add non-Go formatters?
83 // TODO: at a minimum, for common file types, ensure trailing newlines and maybe trim trailing whitespace per line?
84 var fmtFiles []string
85 for _, file := range changedFiles {
86 if !strings.HasSuffix(file, ".go") {
87 continue
88 }
89 fileStatus, err := r.gitFileStatus(ctx, file)
90 if err != nil {
91 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get file status", "file", file, "err", err)
92 continue
93 }
94 if fileStatus == "D" { // deleted, nothing to format
95 continue
96 }
97 code, err := r.getFileContentAtCommit(ctx, file, head)
98 if err != nil {
99 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get file content at head", "file", file, "err", err)
100 continue
101 }
102 if isAutogeneratedGoFile(code) { // leave autogenerated files alone
103 continue
104 }
105 onDisk, err := os.ReadFile(file)
106 if err != nil {
107 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to read file", "file", file, "err", err)
108 continue
109 }
110 if !bytes.Equal(code, onDisk) { // file has been modified since HEAD
111 slog.WarnContext(ctx, "CodeReviewer.Autoformat file modified since HEAD", "file", file, "err", err)
112 continue
113 }
114 var formatterToUse string
115 if fileStatus == "A" {
116 formatterToUse = "gofumpt" // newly added, so we can format how we please: use gofumpt
117 } else {
118 prev, err := r.getFileContentAtCommit(ctx, file, parent)
119 if err != nil {
120 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get file content at parent", "file", file, "err", err)
121 continue
122 }
123 formatterToUse = r.pickFormatter(ctx, prev) // pick the strictest formatter that passes on the original version
124 }
125
126 // Apply the chosen formatter to the current file
127 newCode := r.runFormatter(ctx, formatterToUse, code)
128 if newCode == nil { // no changes made
129 continue
130 }
131 // write to disk
132 if err := os.WriteFile(file, newCode, 0o600); err != nil {
133 slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to write formatted file", "file", file, "err", err)
134 continue
135 }
136 fmtFiles = append(fmtFiles, file)
137 }
138 return fmtFiles
139}
140
141// RequireNormalGitState checks that the git repo state is pretty normal.
142func (r *CodeReviewer) RequireNormalGitState(_ context.Context) error {
143 rebaseDirs := []string{"rebase-merge", "rebase-apply"}
144 for _, dir := range rebaseDirs {
145 _, err := os.Stat(filepath.Join(r.repoRoot, dir))
146 if err == nil {
147 return fmt.Errorf("git repo is not clean: rebase in progress")
148 }
149 }
150 filesReason := map[string]string{
151 "MERGE_HEAD": "merge is in progress",
152 "CHERRY_PICK_HEAD": "cherry-pick is in progress",
153 "REVERT_HEAD": "revert is in progress",
154 "BISECT_LOG": "bisect is in progress",
155 }
156 for file, reason := range filesReason {
157 _, err := os.Stat(filepath.Join(r.repoRoot, file))
158 if err == nil {
159 return fmt.Errorf("git repo is not clean: %s", reason)
160 }
161 }
162 return nil
163}
164
165func (r *CodeReviewer) RequireNoUncommittedChanges(ctx context.Context) error {
166 // Check that there are no uncommitted changes, whether staged or not.
167 // (Changes in r.initialStatus are OK, no other changes are.)
168 statuses, err := r.repoStatus(ctx)
169 if err != nil {
170 return fmt.Errorf("unable to get repo status: %w", err)
171 }
172 uncommitted := new(strings.Builder)
173 for _, status := range statuses {
174 if !r.initialStatusesContainFile(status.Path) {
175 fmt.Fprintf(uncommitted, "%s %s\n", status.Path, status.RawStatus)
176 }
177 }
178 if uncommitted.Len() > 0 {
179 return fmt.Errorf("uncommitted changes in repo, please commit or revert:\n%s", uncommitted.String())
180 }
181 return nil
182}
183
184func (r *CodeReviewer) initialStatusesContainFile(file string) bool {
185 for _, s := range r.initialStatus {
186 if s.Path == file {
187 return true
188 }
189 }
190 return false
191}
192
193type fileStatus struct {
194 Path string
195 RawStatus string // always 2 characters
196}
197
198func (r *CodeReviewer) repoStatus(ctx context.Context) ([]fileStatus, error) {
199 // Run git status --porcelain, split into lines
200 cmd := exec.CommandContext(ctx, "git", "status", "--porcelain")
201 cmd.Dir = r.repoRoot
202 out, err := cmd.CombinedOutput()
203 if err != nil {
204 return nil, fmt.Errorf("failed to run git status: %w\n%s", err, out)
205 }
206 var statuses []fileStatus
207 for line := range strings.Lines(string(out)) {
208 if len(line) == 0 {
209 continue
210 }
211 if len(line) < 3 {
212 return nil, fmt.Errorf("invalid status line: %s", line)
213 }
214 path := line[3:]
215 status := line[:2]
216 absPath := r.absPath(path)
217 statuses = append(statuses, fileStatus{Path: absPath, RawStatus: status})
218 }
219 return statuses, nil
220}
221
222// CurrentCommit retrieves the current git commit hash
223func (r *CodeReviewer) CurrentCommit(ctx context.Context) (string, error) {
224 return r.ResolveCommit(ctx, "HEAD")
225}
226
227func (r *CodeReviewer) ResolveCommit(ctx context.Context, ref string) (string, error) {
228 cmd := exec.CommandContext(ctx, "git", "rev-parse", ref)
229 cmd.Dir = r.repoRoot
230 out, err := cmd.CombinedOutput()
231 if err != nil {
232 return "", fmt.Errorf("failed to get current commit hash: %w\n%s", err, out)
233 }
234 return strings.TrimSpace(string(out)), nil
235}
236
237func (r *CodeReviewer) absPath(relPath string) string {
238 return filepath.Clean(filepath.Join(r.repoRoot, relPath))
239}
240
241// gitFileStatus returns the status of a file (A for added, M for modified, D for deleted, etc.)
242func (r *CodeReviewer) gitFileStatus(ctx context.Context, file string) (string, error) {
243 cmd := exec.CommandContext(ctx, "git", "diff", "--name-status", r.initialCommit, "HEAD", "--", file)
244 cmd.Dir = r.repoRoot
245 out, err := cmd.CombinedOutput()
246 if err != nil {
247 return "", fmt.Errorf("failed to get file status: %w\n%s", err, out)
248 }
249 status := strings.TrimSpace(string(out))
250 if status == "" {
251 return "", fmt.Errorf("no status found for file: %s", file)
252 }
253 return string(status[0]), nil
254}
255
256// getFileContentAtCommit retrieves file content at a specific commit
257func (r *CodeReviewer) getFileContentAtCommit(ctx context.Context, file, commit string) ([]byte, error) {
258 relFile, err := filepath.Rel(r.repoRoot, file)
259 if err != nil {
260 slog.WarnContext(ctx, "CodeReviewer.getFileContentAtCommit: failed to get relative path", "repo_root", r.repoRoot, "file", file, "err", err)
261 file = relFile
262 }
263 cmd := exec.CommandContext(ctx, "git", "show", fmt.Sprintf("%s:%s", commit, relFile))
264 cmd.Dir = r.repoRoot
265 out, err := cmd.CombinedOutput()
266 if err != nil {
267 return nil, fmt.Errorf("failed to get file content at commit %s: %w\n%s", commit, err, out)
268 }
269 return out, nil
270}
271
272// runFormatter runs the specified formatter on a file and returns the results.
273// A nil result indicates that the file is unchanged, or that an error occurred.
274func (r *CodeReviewer) runFormatter(ctx context.Context, formatter string, content []byte) []byte {
275 if formatter == "" {
276 return nil // no formatter
277 }
278 // Run the formatter and capture the output
279 cmd := exec.CommandContext(ctx, formatter)
280 cmd.Dir = r.repoRoot
281 cmd.Stdin = bytes.NewReader(content)
282 out, err := cmd.CombinedOutput()
283 if err != nil {
284 // probably a parse error, err on the side of safety
285 return nil
286 }
287 if bytes.Equal(content, out) {
288 return nil // no changes
289 }
290 return out
291}
292
293// formatterWouldChange reports whether a formatter would make changes to the content.
294// If the contents are invalid, it returns false.
295// It works by piping the content to the formatter with the -l flag.
296func (r *CodeReviewer) formatterWouldChange(ctx context.Context, formatter string, content []byte) bool {
297 cmd := exec.CommandContext(ctx, formatter, "-l")
298 cmd.Dir = r.repoRoot
299 cmd.Stdin = bytes.NewReader(content)
300 out, err := cmd.CombinedOutput()
301 if err != nil {
302 // probably a parse error, err on the side of safety
303 return false
304 }
305
306 // If the output is empty, the file passes the formatter
307 // If the output contains "<standard input>", the file would be changed
308 return len(bytes.TrimSpace(out)) > 0
309}
310
311// pickFormatter picks a formatter to use for code.
312// If something goes wrong, it recommends no formatter (empty string).
313func (r *CodeReviewer) pickFormatter(ctx context.Context, code []byte) string {
314 // Test each formatter from strictest to least strict.
315 // Keep the first one that doesn't make changes.
316 formatters := []string{"gofumpt", "goimports", "gofmt"}
317 for _, formatter := range formatters {
318 if r.formatterWouldChange(ctx, formatter, code) {
319 continue
320 }
321 return formatter
322 }
323 return "" // no safe formatter found
324}
325
326// changedFiles retrieves a list of all files changed between two commits
327func (r *CodeReviewer) changedFiles(ctx context.Context, fromCommit, toCommit string) ([]string, error) {
328 cmd := exec.CommandContext(ctx, "git", "diff", "--name-only", fromCommit, toCommit)
329 cmd.Dir = r.repoRoot
330 out, err := cmd.CombinedOutput()
331 if err != nil {
332 return nil, fmt.Errorf("failed to get changed files: %w\n%s", err, out)
333 }
334 var files []string
335 for line := range strings.Lines(string(out)) {
336 line = strings.TrimSpace(line)
337 if len(line) == 0 {
338 continue
339 }
340 path := r.absPath(line)
341 if r.initialStatusesContainFile(path) {
342 continue
343 }
344 files = append(files, path)
345 }
346 return files, nil
347}