blob: 19741746e3f023d351fe8eb7d37c03e1def3557f [file] [log] [blame]
Earl Lee2e463fb2025-04-17 11:22:22 -07001package claudetool
2
3import (
4 "bytes"
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +00005 "cmp"
Earl Lee2e463fb2025-04-17 11:22:22 -07006 "context"
7 "encoding/json"
8 "fmt"
9 "io"
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +000010 "io/fs"
Earl Lee2e463fb2025-04-17 11:22:22 -070011 "log/slog"
12 "maps"
13 "os"
14 "os/exec"
15 "path/filepath"
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +000016 "runtime"
Earl Lee2e463fb2025-04-17 11:22:22 -070017 "slices"
18 "strings"
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +000019 "sync"
Earl Lee2e463fb2025-04-17 11:22:22 -070020 "time"
21
22 "golang.org/x/tools/go/packages"
23 "sketch.dev/ant"
24)
25
26// This file does differential quality analysis of a commit relative to a base commit.
27
28// Tool returns a tool spec for a CodeReview tool backed by r.
29func (r *CodeReviewer) Tool() *ant.Tool {
30 spec := &ant.Tool{
31 Name: "codereview",
32 Description: `Run an automated code review.`,
33 // If you modify this, update the termui template for prettier rendering.
34 InputSchema: ant.MustSchema(`{"type": "object"}`),
35 Run: r.Run,
36 }
37 return spec
38}
39
40func (r *CodeReviewer) Run(ctx context.Context, m json.RawMessage) (string, error) {
41 if err := r.RequireNormalGitState(ctx); err != nil {
42 slog.DebugContext(ctx, "CodeReviewer.Run: failed to check for normal git state", "err", err)
43 return "", err
44 }
45 if err := r.RequireNoUncommittedChanges(ctx); err != nil {
46 slog.DebugContext(ctx, "CodeReviewer.Run: failed to check for uncommitted changes", "err", err)
47 return "", err
48 }
49
50 // Check that the current commit is not the initial commit
51 currentCommit, err := r.CurrentCommit(ctx)
52 if err != nil {
53 slog.DebugContext(ctx, "CodeReviewer.Run: failed to get current commit", "err", err)
54 return "", err
55 }
56 if r.IsInitialCommit(currentCommit) {
57 slog.DebugContext(ctx, "CodeReviewer.Run: current commit is initial commit, nothing to review")
58 return "", fmt.Errorf("no new commits have been added, nothing to review")
59 }
60
61 // No matter what failures happen from here out, we will declare this to have been reviewed.
62 // This should help avoid the model getting blocked by a broken code review tool.
63 r.reviewed = append(r.reviewed, currentCommit)
64
65 changedFiles, err := r.changedFiles(ctx, r.initialCommit, currentCommit)
66 if err != nil {
67 slog.DebugContext(ctx, "CodeReviewer.Run: failed to get changed files", "err", err)
68 return "", err
69 }
70
71 // Prepare to analyze before/after for the impacted files.
72 // We use the current commit to determine what packages exist and are impacted.
73 // The packages in the initial commit may be different.
74 // Good enough for now.
75 // TODO: do better
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +000076 allPkgs, err := r.packagesForFiles(ctx, changedFiles)
Earl Lee2e463fb2025-04-17 11:22:22 -070077 if err != nil {
78 // TODO: log and skip to stuff that doesn't require packages
79 slog.DebugContext(ctx, "CodeReviewer.Run: failed to get packages for files", "err", err)
80 return "", err
81 }
82 allPkgList := slices.Collect(maps.Keys(allPkgs))
Earl Lee2e463fb2025-04-17 11:22:22 -070083
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +000084 var errorMessages []string // problems we want the model to address
85 var infoMessages []string // info the model should consider
86
87 // Find potentially related files that should also be considered
88 // TODO: add some caching here, since this depends only on the initial commit and the changed files, not the details of the changes
Josh Bleecher Snyder15272c12025-04-28 19:36:15 -070089 // TODO: make "related files found" w/o other notes be a non-error response
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +000090 relatedFiles, err := r.findRelatedFiles(ctx, changedFiles)
91 if err != nil {
92 slog.DebugContext(ctx, "CodeReviewer.Run: failed to find related files", "err", err)
93 } else {
94 relatedMsg := r.formatRelatedFiles(relatedFiles)
95 if relatedMsg != "" {
96 infoMessages = append(infoMessages, relatedMsg)
97 }
98 }
Earl Lee2e463fb2025-04-17 11:22:22 -070099
100 testMsg, err := r.checkTests(ctx, allPkgList)
101 if err != nil {
102 slog.DebugContext(ctx, "CodeReviewer.Run: failed to check tests", "err", err)
103 return "", err
104 }
105 if testMsg != "" {
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000106 errorMessages = append(errorMessages, testMsg)
Earl Lee2e463fb2025-04-17 11:22:22 -0700107 }
108
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000109 goplsMsg, err := r.checkGopls(ctx, changedFiles) // includes vet checks
Earl Lee2e463fb2025-04-17 11:22:22 -0700110 if err != nil {
111 slog.DebugContext(ctx, "CodeReviewer.Run: failed to check gopls", "err", err)
112 return "", err
113 }
114 if goplsMsg != "" {
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000115 errorMessages = append(errorMessages, goplsMsg)
Earl Lee2e463fb2025-04-17 11:22:22 -0700116 }
117
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000118 buf := new(strings.Builder)
119 if len(infoMessages) > 0 {
120 buf.WriteString("# Info\n\n")
121 buf.WriteString(strings.Join(infoMessages, "\n\n"))
122 buf.WriteString("\n\n")
Earl Lee2e463fb2025-04-17 11:22:22 -0700123 }
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000124 if len(errorMessages) > 0 {
125 buf.WriteString("# Errors\n\n")
126 buf.WriteString(strings.Join(errorMessages, "\n\n"))
127 buf.WriteString("\n\nPlease fix before proceeding.\n")
128 }
129 if buf.Len() == 0 {
130 buf.WriteString("OK")
131 }
132 return buf.String(), nil
Earl Lee2e463fb2025-04-17 11:22:22 -0700133}
134
135func (r *CodeReviewer) initializeInitialCommitWorktree(ctx context.Context) error {
136 if r.initialWorktree != "" {
137 return nil
138 }
139 tmpDir, err := os.MkdirTemp("", "sketch-codereview-worktree")
140 if err != nil {
141 return err
142 }
143 worktreeCmd := exec.CommandContext(ctx, "git", "worktree", "add", "--detach", tmpDir, r.initialCommit)
144 worktreeCmd.Dir = r.repoRoot
145 out, err := worktreeCmd.CombinedOutput()
146 if err != nil {
147 return fmt.Errorf("unable to create worktree for initial commit: %w\n%s", err, out)
148 }
149 r.initialWorktree = tmpDir
150 return nil
151}
152
153func (r *CodeReviewer) checkTests(ctx context.Context, pkgList []string) (string, error) {
154 goTestArgs := []string{"test", "-json", "-v"}
155 goTestArgs = append(goTestArgs, pkgList...)
156
157 afterTestCmd := exec.CommandContext(ctx, "go", goTestArgs...)
158 afterTestCmd.Dir = r.repoRoot
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000159 afterTestOut, _ := afterTestCmd.Output()
160 // unfortunately, we can't short-circuit here even if all tests pass,
161 // because we need to check for skipped tests.
Earl Lee2e463fb2025-04-17 11:22:22 -0700162
163 err := r.initializeInitialCommitWorktree(ctx)
164 if err != nil {
165 return "", err
166 }
167
168 beforeTestCmd := exec.CommandContext(ctx, "go", goTestArgs...)
169 beforeTestCmd.Dir = r.initialWorktree
170 beforeTestOut, _ := beforeTestCmd.Output() // ignore error, interesting info is in the output
171
172 // Parse the jsonl test results
173 beforeResults, beforeParseErr := parseTestResults(beforeTestOut)
174 if beforeParseErr != nil {
175 return "", fmt.Errorf("unable to parse test results for initial commit: %w\n%s", beforeParseErr, beforeTestOut)
176 }
177 afterResults, afterParseErr := parseTestResults(afterTestOut)
178 if afterParseErr != nil {
179 return "", fmt.Errorf("unable to parse test results for current commit: %w\n%s", afterParseErr, afterTestOut)
180 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700181 testRegressions, err := r.compareTestResults(beforeResults, afterResults)
182 if err != nil {
183 return "", fmt.Errorf("failed to compare test results: %w", err)
184 }
185 // TODO: better output formatting?
186 res := r.formatTestRegressions(testRegressions)
187 return res, nil
188}
189
Earl Lee2e463fb2025-04-17 11:22:22 -0700190// GoplsIssue represents a single issue reported by gopls check
191type GoplsIssue struct {
192 Position string // File position in format "file:line:col-range"
193 Message string // Description of the issue
194}
195
196// checkGopls runs gopls check on the provided files in both the current and initial state,
197// compares the results, and reports any new issues introduced in the current state.
198func (r *CodeReviewer) checkGopls(ctx context.Context, changedFiles []string) (string, error) {
199 if len(changedFiles) == 0 {
200 return "", nil // no files to check
201 }
202
203 // Filter out non-Go files as gopls only works on Go files
204 // and verify they still exist (not deleted)
205 var goFiles []string
206 for _, file := range changedFiles {
207 if !strings.HasSuffix(file, ".go") {
208 continue // not a Go file
209 }
210
211 // Check if the file still exists (not deleted)
212 if _, err := os.Stat(file); os.IsNotExist(err) {
213 continue // file doesn't exist anymore (deleted)
214 }
215
216 goFiles = append(goFiles, file)
217 }
218
219 if len(goFiles) == 0 {
220 return "", nil // no Go files to check
221 }
222
223 // Run gopls check on the current state
224 goplsArgs := append([]string{"check"}, goFiles...)
225
226 afterGoplsCmd := exec.CommandContext(ctx, "gopls", goplsArgs...)
227 afterGoplsCmd.Dir = r.repoRoot
228 afterGoplsOut, err := afterGoplsCmd.CombinedOutput() // gopls returns non-zero if it finds issues
229 if err != nil {
230 // Check if the output looks like real gopls issues or if it's just error output
231 if !looksLikeGoplsIssues(afterGoplsOut) {
232 slog.WarnContext(ctx, "CodeReviewer.checkGopls: gopls check failed to run properly", "err", err, "output", string(afterGoplsOut))
233 return "", nil // Skip rather than failing the entire code review
234 }
235 // Otherwise, proceed with parsing - it's likely just the non-zero exit code due to found issues
236 }
237
238 // Parse the output
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000239 afterIssues := parseGoplsOutput(r.repoRoot, afterGoplsOut)
Earl Lee2e463fb2025-04-17 11:22:22 -0700240
241 // If no issues were found, we're done
242 if len(afterIssues) == 0 {
243 return "", nil
244 }
245
246 // Gopls detected issues in the current state, check if they existed in the initial state
247 initErr := r.initializeInitialCommitWorktree(ctx)
248 if initErr != nil {
249 return "", err
250 }
251
252 // For each file that exists in the initial commit, run gopls check
253 var initialFilesToCheck []string
254 for _, file := range goFiles {
255 // Get relative path for git operations
256 relFile, err := filepath.Rel(r.repoRoot, file)
257 if err != nil {
258 slog.WarnContext(ctx, "CodeReviewer.checkGopls: failed to get relative path", "repo_root", r.repoRoot, "file", file, "err", err)
259 continue
260 }
261
262 // Check if the file exists in the initial commit
263 checkCmd := exec.CommandContext(ctx, "git", "cat-file", "-e", fmt.Sprintf("%s:%s", r.initialCommit, relFile))
264 checkCmd.Dir = r.repoRoot
265 if err := checkCmd.Run(); err == nil {
266 // File exists in initial commit
267 initialFilePath := filepath.Join(r.initialWorktree, relFile)
268 initialFilesToCheck = append(initialFilesToCheck, initialFilePath)
269 }
270 }
271
272 // Run gopls check on the files that existed in the initial commit
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000273 var beforeIssues []GoplsIssue
Earl Lee2e463fb2025-04-17 11:22:22 -0700274 if len(initialFilesToCheck) > 0 {
275 beforeGoplsArgs := append([]string{"check"}, initialFilesToCheck...)
276 beforeGoplsCmd := exec.CommandContext(ctx, "gopls", beforeGoplsArgs...)
277 beforeGoplsCmd.Dir = r.initialWorktree
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000278 beforeGoplsOut, beforeCmdErr := beforeGoplsCmd.CombinedOutput()
Earl Lee2e463fb2025-04-17 11:22:22 -0700279 if beforeCmdErr != nil && !looksLikeGoplsIssues(beforeGoplsOut) {
280 // If gopls fails to run properly on the initial commit, log a warning and continue
281 // with empty before issues - this will be conservative and report more issues
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000282 slog.WarnContext(ctx, "CodeReviewer.checkGopls: gopls check failed on initial commit", "err", err, "output", string(beforeGoplsOut))
Earl Lee2e463fb2025-04-17 11:22:22 -0700283 } else {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000284 beforeIssues = parseGoplsOutput(r.initialWorktree, beforeGoplsOut)
Earl Lee2e463fb2025-04-17 11:22:22 -0700285 }
286 }
287
288 // Find new issues that weren't present in the initial state
289 goplsRegressions := findGoplsRegressions(beforeIssues, afterIssues)
290 if len(goplsRegressions) == 0 {
291 return "", nil // no new issues
292 }
293
294 // Format the results
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000295 return r.formatGoplsRegressions(goplsRegressions), nil
Earl Lee2e463fb2025-04-17 11:22:22 -0700296}
297
298// parseGoplsOutput parses the text output from gopls check
299// Each line has the format: '/path/to/file.go:448:22-26: unused parameter: path'
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000300func parseGoplsOutput(root string, output []byte) []GoplsIssue {
Earl Lee2e463fb2025-04-17 11:22:22 -0700301 var issues []GoplsIssue
302 lines := strings.Split(string(output), "\n")
303
304 for _, line := range lines {
305 line = strings.TrimSpace(line)
306 if line == "" {
307 continue
308 }
309
310 // Skip lines that look like error messages rather than gopls issues
311 if strings.HasPrefix(line, "Error:") ||
312 strings.HasPrefix(line, "Failed:") ||
313 strings.HasPrefix(line, "Warning:") ||
314 strings.HasPrefix(line, "gopls:") {
315 continue
316 }
317
318 // Find the first colon that separates the file path from the line number
319 firstColonIdx := strings.Index(line, ":")
320 if firstColonIdx < 0 {
321 continue // Invalid format
322 }
323
324 // Verify the part before the first colon looks like a file path
325 potentialPath := line[:firstColonIdx]
326 if !strings.HasSuffix(potentialPath, ".go") {
327 continue // Not a Go file path
328 }
329
330 // Find the position of the first message separator ': '
331 // This separates the position info from the message
332 messageStart := strings.Index(line, ": ")
333 if messageStart < 0 || messageStart <= firstColonIdx {
334 continue // Invalid format
335 }
336
337 // Extract position and message
338 position := line[:messageStart]
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000339 rel, err := filepath.Rel(root, position)
340 if err == nil {
341 position = rel
342 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700343 message := line[messageStart+2:] // Skip the ': ' separator
344
345 // Verify position has the expected format (at least 2 colons for line:col)
346 colonCount := strings.Count(position, ":")
347 if colonCount < 2 {
348 continue // Not enough position information
349 }
350
351 issues = append(issues, GoplsIssue{
352 Position: position,
353 Message: message,
354 })
355 }
356
357 return issues
358}
359
360// looksLikeGoplsIssues checks if the output appears to be actual gopls issues
361// rather than error messages about gopls itself failing
362func looksLikeGoplsIssues(output []byte) bool {
363 // If output is empty, it's not valid issues
364 if len(output) == 0 {
365 return false
366 }
367
368 // Check if output has at least one line that looks like a gopls issue
369 // A gopls issue looks like: '/path/to/file.go:123:45-67: message'
370 lines := strings.Split(string(output), "\n")
371 for _, line := range lines {
372 line = strings.TrimSpace(line)
373 if line == "" {
374 continue
375 }
376
377 // A gopls issue has at least two colons (file path, line number, column)
378 // and contains a colon followed by a space (separating position from message)
379 colonCount := strings.Count(line, ":")
380 hasSeparator := strings.Contains(line, ": ")
381
382 if colonCount >= 2 && hasSeparator {
383 // Check if it starts with a likely file path (ending in .go)
384 parts := strings.SplitN(line, ":", 2)
385 if strings.HasSuffix(parts[0], ".go") {
386 return true
387 }
388 }
389 }
390 return false
391}
392
393// normalizeGoplsPosition extracts just the file path from a position string
394func normalizeGoplsPosition(position string) string {
395 // Extract just the file path by taking everything before the first colon
396 parts := strings.Split(position, ":")
397 if len(parts) < 1 {
398 return position
399 }
400 return parts[0]
401}
402
403// findGoplsRegressions identifies gopls issues that are new in the after state
404func findGoplsRegressions(before, after []GoplsIssue) []GoplsIssue {
405 var regressions []GoplsIssue
406
407 // Build map of before issues for easier lookup
408 beforeIssueMap := make(map[string]map[string]bool) // file -> message -> exists
409 for _, issue := range before {
410 file := normalizeGoplsPosition(issue.Position)
411 if _, exists := beforeIssueMap[file]; !exists {
412 beforeIssueMap[file] = make(map[string]bool)
413 }
414 // Store both the exact message and the general issue type for fuzzy matching
415 beforeIssueMap[file][issue.Message] = true
416
417 // Extract the general issue type (everything before the first ':' in the message)
418 generalIssue := issue.Message
419 if colonIdx := strings.Index(issue.Message, ":"); colonIdx > 0 {
420 generalIssue = issue.Message[:colonIdx]
421 }
422 beforeIssueMap[file][generalIssue] = true
423 }
424
425 // Check each after issue to see if it's new
426 for _, afterIssue := range after {
427 file := normalizeGoplsPosition(afterIssue.Position)
428 isNew := true
429
430 if fileIssues, fileExists := beforeIssueMap[file]; fileExists {
431 // Check for exact message match
432 if fileIssues[afterIssue.Message] {
433 isNew = false
434 } else {
435 // Check for general issue type match
436 generalIssue := afterIssue.Message
437 if colonIdx := strings.Index(afterIssue.Message, ":"); colonIdx > 0 {
438 generalIssue = afterIssue.Message[:colonIdx]
439 }
440 if fileIssues[generalIssue] {
441 isNew = false
442 }
443 }
444 }
445
446 if isNew {
447 regressions = append(regressions, afterIssue)
448 }
449 }
450
451 // Sort regressions for deterministic output
452 slices.SortFunc(regressions, func(a, b GoplsIssue) int {
453 return strings.Compare(a.Position, b.Position)
454 })
455
456 return regressions
457}
458
459// formatGoplsRegressions generates a human-readable summary of gopls check regressions
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000460func (r *CodeReviewer) formatGoplsRegressions(regressions []GoplsIssue) string {
Earl Lee2e463fb2025-04-17 11:22:22 -0700461 if len(regressions) == 0 {
462 return ""
463 }
464
465 var sb strings.Builder
466 sb.WriteString("Gopls check issues detected:\n\n")
467
468 // Format each issue
469 for i, issue := range regressions {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000470 sb.WriteString(fmt.Sprintf("%d. %s: %s\n", i+1, filepath.Join(r.repoRoot, issue.Position), issue.Message))
Earl Lee2e463fb2025-04-17 11:22:22 -0700471 }
472
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000473 sb.WriteString("\nIMPORTANT: Only fix new gopls check issues in parts of the code that you have already edited.")
474 sb.WriteString(" Do not change existing code that was not part of your current edits.\n")
Earl Lee2e463fb2025-04-17 11:22:22 -0700475 return sb.String()
476}
477
478func (r *CodeReviewer) HasReviewed(commit string) bool {
479 return slices.Contains(r.reviewed, commit)
480}
481
482func (r *CodeReviewer) IsInitialCommit(commit string) bool {
483 return commit == r.initialCommit
484}
485
486// packagesForFiles returns maps of packages related to the given files:
487// 1. directPkgs: packages that directly contain the changed files
488// 2. allPkgs: all packages that might be affected, including downstream packages that depend on the direct packages
489// It may include false positives.
490// Files must be absolute paths!
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000491func (r *CodeReviewer) packagesForFiles(ctx context.Context, files []string) (allPkgs map[string]*packages.Package, err error) {
Earl Lee2e463fb2025-04-17 11:22:22 -0700492 for _, f := range files {
493 if !filepath.IsAbs(f) {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000494 return nil, fmt.Errorf("path %q is not absolute", f)
Earl Lee2e463fb2025-04-17 11:22:22 -0700495 }
496 }
497 cfg := &packages.Config{
498 Mode: packages.LoadImports | packages.NeedEmbedFiles,
499 Context: ctx,
500 // Logf: func(msg string, args ...any) {
501 // slog.DebugContext(ctx, "loading go packages", "msg", fmt.Sprintf(msg, args...))
502 // },
503 // TODO: in theory, go.mod might not be in the repo root, and there might be multiple go.mod files.
504 // We can cross that bridge when we get there.
505 Dir: r.repoRoot,
506 Tests: true,
507 }
508 universe, err := packages.Load(cfg, "./...")
509 if err != nil {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000510 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -0700511 }
512 // Identify packages that directly contain the changed files
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000513 directPkgs := make(map[string]*packages.Package) // import path -> package
Earl Lee2e463fb2025-04-17 11:22:22 -0700514 for _, pkg := range universe {
Earl Lee2e463fb2025-04-17 11:22:22 -0700515 pkgFiles := allFiles(pkg)
Earl Lee2e463fb2025-04-17 11:22:22 -0700516 for _, file := range files {
517 if pkgFiles[file] {
518 // prefer test packages, as they contain strictly more files (right?)
519 prev := directPkgs[pkg.PkgPath]
520 if prev == nil || prev.ForTest == "" {
521 directPkgs[pkg.PkgPath] = pkg
522 }
523 }
524 }
525 }
526
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000527 allPkgs = maps.Clone(directPkgs)
Earl Lee2e463fb2025-04-17 11:22:22 -0700528
529 // Add packages that depend on the direct packages
530 addDependentPackages(universe, allPkgs)
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000531 return allPkgs, nil
Earl Lee2e463fb2025-04-17 11:22:22 -0700532}
533
534// allFiles returns all files that might be referenced by the package.
535// It may contain false positives.
536func allFiles(p *packages.Package) map[string]bool {
537 files := make(map[string]bool)
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000538 // Add files from package info
Earl Lee2e463fb2025-04-17 11:22:22 -0700539 add := [][]string{p.GoFiles, p.CompiledGoFiles, p.OtherFiles, p.EmbedFiles, p.IgnoredFiles}
540 for _, extra := range add {
541 for _, file := range extra {
542 files[file] = true
543 }
544 }
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000545 // Add files from testdata directory
546 testdataDir := filepath.Join(p.Dir, "testdata")
547 if _, err := os.Stat(testdataDir); err == nil {
548 fsys := os.DirFS(p.Dir)
549 fs.WalkDir(fsys, "testdata", func(path string, d fs.DirEntry, err error) error {
550 if err == nil && !d.IsDir() {
551 files[filepath.Join(p.Dir, path)] = true
552 }
553 return nil
554 })
555 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700556 return files
557}
558
559// addDependentPackages adds to pkgs all packages from universe
560// that directly or indirectly depend on any package already in pkgs.
561func addDependentPackages(universe []*packages.Package, pkgs map[string]*packages.Package) {
562 for {
563 changed := false
564 for _, p := range universe {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000565 if strings.HasSuffix(p.PkgPath, ".test") { // ick, but I don't see another way
566 // skip test packages
567 continue
568 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700569 if _, ok := pkgs[p.PkgPath]; ok {
570 // already in pkgs
571 continue
572 }
573 for importPath := range p.Imports {
574 if _, ok := pkgs[importPath]; ok {
575 // imports a package dependent on pkgs, add it
576 pkgs[p.PkgPath] = p
577 changed = true
578 break
579 }
580 }
581 }
582 if !changed {
583 break
584 }
585 }
586}
587
588// testJSON is a union of BuildEvent and TestEvent
589type testJSON struct {
590 // TestEvent only:
591 // The Time field holds the time the event happened. It is conventionally omitted
592 // for cached test results.
593 Time time.Time `json:"Time"`
594 // BuildEvent only:
595 // The ImportPath field gives the package ID of the package being built.
596 // This matches the Package.ImportPath field of go list -json and the
597 // TestEvent.FailedBuild field of go test -json. Note that it does not
598 // match TestEvent.Package.
599 ImportPath string `json:"ImportPath"` // BuildEvent only
600 // TestEvent only:
601 // The Package field, if present, specifies the package being tested. When the
602 // go command runs parallel tests in -json mode, events from different tests are
603 // interlaced; the Package field allows readers to separate them.
604 Package string `json:"Package"`
605 // Action is used in both BuildEvent and TestEvent.
606 // It is the key to distinguishing between them.
607 // BuildEvent:
608 // build-output or build-fail
609 // TestEvent:
610 // start, run, pause, cont, pass, bench, fail, output, skip
611 Action string `json:"Action"`
612 // TestEvent only:
613 // The Test field, if present, specifies the test, example, or benchmark function
614 // that caused the event. Events for the overall package test do not set Test.
615 Test string `json:"Test"`
616 // TestEvent only:
617 // The Elapsed field is set for "pass" and "fail" events. It gives the time elapsed in seconds
618 // for the specific test or the overall package test that passed or failed.
619 Elapsed float64
620 // TestEvent:
621 // The Output field is set for Action == "output" and is a portion of the
622 // test's output (standard output and standard error merged together). The
623 // output is unmodified except that invalid UTF-8 output from a test is coerced
624 // into valid UTF-8 by use of replacement characters. With that one exception,
625 // the concatenation of the Output fields of all output events is the exact output
626 // of the test execution.
627 // BuildEvent:
628 // The Output field is set for Action == "build-output" and is a portion of
629 // the build's output. The concatenation of the Output fields of all output
630 // events is the exact output of the build. A single event may contain one
631 // or more lines of output and there may be more than one output event for
632 // a given ImportPath. This matches the definition of the TestEvent.Output
633 // field produced by go test -json.
634 Output string `json:"Output"`
635 // TestEvent only:
636 // The FailedBuild field is set for Action == "fail" if the test failure was caused
637 // by a build failure. It contains the package ID of the package that failed to
638 // build. This matches the ImportPath field of the "go list" output, as well as the
639 // BuildEvent.ImportPath field as emitted by "go build -json".
640 FailedBuild string `json:"FailedBuild"`
641}
642
643// parseTestResults converts test output in JSONL format into a slice of testJSON objects
644func parseTestResults(testOutput []byte) ([]testJSON, error) {
645 var results []testJSON
646 dec := json.NewDecoder(bytes.NewReader(testOutput))
647 for {
648 var event testJSON
649 if err := dec.Decode(&event); err != nil {
650 if err == io.EOF {
651 break
652 }
653 return nil, err
654 }
655 results = append(results, event)
656 }
657 return results, nil
658}
659
660// testStatus represents the status of a test in a given commit
661type testStatus int
662
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000663//go:generate go tool stringer -type=testStatus -trimprefix=testStatus
Earl Lee2e463fb2025-04-17 11:22:22 -0700664const (
665 testStatusUnknown testStatus = iota
666 testStatusPass
667 testStatusFail
668 testStatusBuildFail
669 testStatusSkip
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000670 testStatusNoTests // no tests exist for this package
Earl Lee2e463fb2025-04-17 11:22:22 -0700671)
672
Earl Lee2e463fb2025-04-17 11:22:22 -0700673// testRegression represents a test that regressed between commits
674type testRegression struct {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000675 Package string
676 Test string // empty for package tests
Earl Lee2e463fb2025-04-17 11:22:22 -0700677 BeforeStatus testStatus
678 AfterStatus testStatus
679 Output string // failure output in the after state
680}
681
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000682func (r *testRegression) Source() string {
683 if r.Test == "" {
684 return r.Package
Earl Lee2e463fb2025-04-17 11:22:22 -0700685 }
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000686 return fmt.Sprintf("%s.%s", r.Package, r.Test)
687}
Earl Lee2e463fb2025-04-17 11:22:22 -0700688
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000689type packageResult struct {
690 Status testStatus // overall status for the package
691 TestStatus map[string]testStatus // name -> status
692 TestOutput map[string][]string // name -> output parts
693}
Earl Lee2e463fb2025-04-17 11:22:22 -0700694
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000695// collectTestStatuses processes a slice of test events and returns rich status information
696func collectTestStatuses(results []testJSON) map[string]*packageResult {
697 m := make(map[string]*packageResult)
698
699 for _, event := range results {
700 pkg := event.Package
701 p, ok := m[pkg]
702 if !ok {
703 p = new(packageResult)
704 p.TestStatus = make(map[string]testStatus)
705 p.TestOutput = make(map[string][]string)
706 m[pkg] = p
Earl Lee2e463fb2025-04-17 11:22:22 -0700707 }
708
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000709 switch event.Action {
710 case "output":
711 p.TestOutput[event.Test] = append(p.TestOutput[event.Test], event.Output)
Earl Lee2e463fb2025-04-17 11:22:22 -0700712 continue
Earl Lee2e463fb2025-04-17 11:22:22 -0700713 case "pass":
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000714 if event.Test == "" {
715 p.Status = testStatusPass
716 } else {
717 p.TestStatus[event.Test] = testStatusPass
718 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700719 case "fail":
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000720 if event.Test == "" {
721 if event.FailedBuild != "" {
722 p.Status = testStatusBuildFail
723 } else {
724 p.Status = testStatusFail
725 }
726 } else {
727 p.TestStatus[event.Test] = testStatusFail
728 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700729 case "skip":
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000730 if event.Test == "" {
731 p.Status = testStatusNoTests
732 } else {
733 p.TestStatus[event.Test] = testStatusSkip
734 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700735 }
736 }
737
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000738 return m
Earl Lee2e463fb2025-04-17 11:22:22 -0700739}
740
741// compareTestResults identifies tests that have regressed between commits
742func (r *CodeReviewer) compareTestResults(beforeResults, afterResults []testJSON) ([]testRegression, error) {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000743 before := collectTestStatuses(beforeResults)
744 after := collectTestStatuses(afterResults)
745 var testLevelRegressions []testRegression
746 var packageLevelRegressions []testRegression
Earl Lee2e463fb2025-04-17 11:22:22 -0700747
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000748 afterPkgs := slices.Sorted(maps.Keys(after))
749 for _, pkg := range afterPkgs {
750 afterResult := after[pkg]
751 afterStatus := afterResult.Status
752 // Can't short-circuit here when tests are passing, because we need to check for skipped tests.
753 beforeResult, ok := before[pkg]
754 beforeStatus := testStatusNoTests
755 if ok {
756 beforeStatus = beforeResult.Status
757 }
758 // If things no longer build, stop at the package level.
759 // Otherwise, proceed to the test level, so we have more precise information.
760 if afterStatus == testStatusBuildFail && beforeStatus != testStatusBuildFail {
761 packageLevelRegressions = append(packageLevelRegressions, testRegression{
762 Package: pkg,
763 BeforeStatus: beforeStatus,
764 AfterStatus: afterStatus,
765 })
766 continue
767 }
768 tests := slices.Sorted(maps.Keys(afterResult.TestStatus))
769 for _, test := range tests {
770 afterStatus := afterResult.TestStatus[test]
771 switch afterStatus {
772 case testStatusPass:
773 continue
774 case testStatusUnknown:
775 slog.WarnContext(context.Background(), "unknown test status", "package", pkg, "test", test)
776 continue
777 }
778 beforeStatus := beforeResult.TestStatus[test]
779 if isRegression(beforeStatus, afterStatus) {
780 testLevelRegressions = append(testLevelRegressions, testRegression{
781 Package: pkg,
782 Test: test,
783 BeforeStatus: beforeStatus,
784 AfterStatus: afterStatus,
785 Output: strings.Join(afterResult.TestOutput[test], ""),
786 })
787 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700788 }
789 }
790
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000791 // If we have test-level regressions, report only those
792 // Otherwise, report package-level regressions
Earl Lee2e463fb2025-04-17 11:22:22 -0700793 var regressions []testRegression
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000794 if len(testLevelRegressions) > 0 {
795 regressions = testLevelRegressions
796 } else {
797 regressions = packageLevelRegressions
Earl Lee2e463fb2025-04-17 11:22:22 -0700798 }
799
800 // Sort regressions for consistent output
801 slices.SortFunc(regressions, func(a, b testRegression) int {
802 // First by package
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000803 if c := strings.Compare(a.Package, b.Package); c != 0 {
Earl Lee2e463fb2025-04-17 11:22:22 -0700804 return c
805 }
806 // Then by test name
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000807 return strings.Compare(a.Test, b.Test)
Earl Lee2e463fb2025-04-17 11:22:22 -0700808 })
809
810 return regressions, nil
811}
812
813// badnessLevels maps test status to a badness level
814// Higher values indicate worse status (more severe issues)
815var badnessLevels = map[testStatus]int{
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000816 testStatusBuildFail: 5, // Worst
817 testStatusFail: 4,
818 testStatusSkip: 3,
819 testStatusNoTests: 2,
Earl Lee2e463fb2025-04-17 11:22:22 -0700820 testStatusPass: 1,
821 testStatusUnknown: 0, // Least bad - avoids false positives
822}
823
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000824// regressionFormatter defines a mapping of before/after state pairs to descriptive messages
825type regressionKey struct {
826 before, after testStatus
827}
828
829var regressionMessages = map[regressionKey]string{
830 {testStatusUnknown, testStatusBuildFail}: "New test has build/vet errors",
831 {testStatusUnknown, testStatusFail}: "New test is failing",
832 {testStatusUnknown, testStatusSkip}: "New test is skipped",
833 {testStatusPass, testStatusBuildFail}: "Was passing, now has build/vet errors",
834 {testStatusPass, testStatusFail}: "Was passing, now failing",
835 {testStatusPass, testStatusSkip}: "Was passing, now skipped",
836 {testStatusNoTests, testStatusBuildFail}: "Previously had no tests, now has build/vet errors",
837 {testStatusNoTests, testStatusFail}: "Previously had no tests, now has failing tests",
838 {testStatusNoTests, testStatusSkip}: "Previously had no tests, now has skipped tests",
839 {testStatusSkip, testStatusBuildFail}: "Was skipped, now has build/vet errors",
840 {testStatusSkip, testStatusFail}: "Was skipped, now failing",
841 {testStatusFail, testStatusBuildFail}: "Was failing, now has build/vet errors",
842}
843
Earl Lee2e463fb2025-04-17 11:22:22 -0700844// isRegression determines if a test has regressed based on before and after status
845// A regression is defined as an increase in badness level
846func isRegression(before, after testStatus) bool {
847 // Higher badness level means worse status
848 return badnessLevels[after] > badnessLevels[before]
849}
850
851// formatTestRegressions generates a human-readable summary of test regressions
852func (r *CodeReviewer) formatTestRegressions(regressions []testRegression) string {
853 if len(regressions) == 0 {
854 return ""
855 }
856
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000857 buf := new(strings.Builder)
858 fmt.Fprintf(buf, "Test regressions detected between initial commit (%s) and HEAD:\n\n", r.initialCommit)
Earl Lee2e463fb2025-04-17 11:22:22 -0700859
860 for i, reg := range regressions {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000861 fmt.Fprintf(buf, "%d: %v: ", i+1, reg.Source())
862 key := regressionKey{reg.BeforeStatus, reg.AfterStatus}
863 message, exists := regressionMessages[key]
864 if !exists {
865 message = "Regression detected"
Earl Lee2e463fb2025-04-17 11:22:22 -0700866 }
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000867 fmt.Fprintf(buf, "%s\n", message)
Earl Lee2e463fb2025-04-17 11:22:22 -0700868 }
869
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000870 return buf.String()
Earl Lee2e463fb2025-04-17 11:22:22 -0700871}
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000872
873// RelatedFile represents a file historically related to the changed files
874type RelatedFile struct {
875 Path string // Path to the file
876 Correlation float64 // Correlation score (0.0-1.0)
877}
878
879// findRelatedFiles identifies files that are historically related to the changed files
880// by analyzing git commit history for co-occurrences.
881func (r *CodeReviewer) findRelatedFiles(ctx context.Context, changedFiles []string) ([]RelatedFile, error) {
882 commits, err := r.getCommitsTouchingFiles(ctx, changedFiles)
883 if err != nil {
884 return nil, fmt.Errorf("failed to get commits touching files: %w", err)
885 }
886 if len(commits) == 0 {
887 return nil, nil
888 }
889
890 relChanged := make(map[string]bool, len(changedFiles))
891 for _, file := range changedFiles {
892 rel, err := filepath.Rel(r.repoRoot, file)
893 if err != nil {
894 return nil, fmt.Errorf("failed to get relative path for %s: %w", file, err)
895 }
896 relChanged[rel] = true
897 }
898
899 historyFiles := make(map[string]int)
900 var historyMu sync.Mutex
901
902 maxWorkers := runtime.GOMAXPROCS(0)
903 semaphore := make(chan bool, maxWorkers)
904 var wg sync.WaitGroup
905
906 for _, commit := range commits {
907 wg.Add(1)
908 semaphore <- true // acquire
909
910 go func(commit string) {
911 defer wg.Done()
912 defer func() { <-semaphore }() // release
913 commitFiles, err := r.getFilesInCommit(ctx, commit)
914 if err != nil {
915 slog.WarnContext(ctx, "Failed to get files in commit", "commit", commit, "err", err)
916 return
917 }
918 incr := 0
919 for _, file := range commitFiles {
920 if relChanged[file] {
921 incr++
922 }
923 }
924 if incr == 0 {
925 return
926 }
927 historyMu.Lock()
928 defer historyMu.Unlock()
929 for _, file := range commitFiles {
930 historyFiles[file] += incr
931 }
932 }(commit)
933 }
934 wg.Wait()
935
936 // normalize
937 maxCount := 0
938 for _, count := range historyFiles {
939 maxCount = max(maxCount, count)
940 }
941 if maxCount == 0 {
942 return nil, nil
943 }
944
945 var relatedFiles []RelatedFile
946 for file, count := range historyFiles {
947 if relChanged[file] {
948 // Don't include inputs in the output.
949 continue
950 }
951 correlation := float64(count) / float64(maxCount)
952 // Require min correlation to avoid noise
953 if correlation >= 0.1 {
954 relatedFiles = append(relatedFiles, RelatedFile{Path: file, Correlation: correlation})
955 }
956 }
957
958 // Highest correlation first
959 slices.SortFunc(relatedFiles, func(a, b RelatedFile) int {
960 return -1 * cmp.Compare(a.Correlation, b.Correlation)
961 })
962
963 // Limit to 1 correlated file per input file.
964 // (Arbitrary limit, to be adjusted.)
965 maxFiles := len(changedFiles)
966 if len(relatedFiles) > maxFiles {
967 relatedFiles = relatedFiles[:maxFiles]
968 }
969
970 // TODO: add an LLM in the mix here (like the keyword search tool) to do a filtering pass,
971 // and then increase the strength of the wording in the relatedFiles message.
972
973 return relatedFiles, nil
974}
975
976// getCommitsTouchingFiles returns all commits that touch any of the specified files
977func (r *CodeReviewer) getCommitsTouchingFiles(ctx context.Context, files []string) ([]string, error) {
978 if len(files) == 0 {
979 return nil, nil
980 }
Josh Bleecher Snyderd4b1cc72025-04-29 13:49:18 +0000981 fileArgs := append([]string{"rev-list", "--all", "--date-order", "--max-count=100", "--"}, files...)
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000982 cmd := exec.CommandContext(ctx, "git", fileArgs...)
983 cmd.Dir = r.repoRoot
984 out, err := cmd.CombinedOutput()
985 if err != nil {
986 return nil, fmt.Errorf("failed to get commits: %w\n%s", err, out)
987 }
988 return nonEmptyTrimmedLines(out), nil
989}
990
991// getFilesInCommit returns all files changed in a specific commit
992func (r *CodeReviewer) getFilesInCommit(ctx context.Context, commit string) ([]string, error) {
993 cmd := exec.CommandContext(ctx, "git", "diff-tree", "--no-commit-id", "--name-only", "-r", commit)
994 cmd.Dir = r.repoRoot
995 out, err := cmd.CombinedOutput()
996 if err != nil {
997 return nil, fmt.Errorf("failed to get files in commit: %w\n%s", err, out)
998 }
999 return nonEmptyTrimmedLines(out), nil
1000}
1001
1002func nonEmptyTrimmedLines(b []byte) []string {
1003 var lines []string
1004 for line := range strings.Lines(string(b)) {
1005 line = strings.TrimSpace(line)
1006 if line != "" {
1007 lines = append(lines, line)
1008 }
1009 }
1010 return lines
1011}
1012
1013// formatRelatedFiles formats the related files list into a human-readable message
1014func (r *CodeReviewer) formatRelatedFiles(files []RelatedFile) string {
1015 if len(files) == 0 {
1016 return ""
1017 }
1018
1019 buf := new(strings.Builder)
1020
1021 fmt.Fprintf(buf, "Potentially related files:\n\n")
1022
1023 for _, file := range files {
1024 fmt.Fprintf(buf, "- %s (%0.0f%%)\n", file.Path, 100*file.Correlation)
1025 }
1026
1027 fmt.Fprintf(buf, "\nThese files have historically changed with the files you have modified. Consider whether they require updates as well.\n")
1028 return buf.String()
1029}