blob: a6b3413261b2ce82733ab1cff168a8b986313c2b [file] [log] [blame]
Earl Lee2e463fb2025-04-17 11:22:22 -07001package claudetool
2
3import (
4 "bytes"
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +00005 "cmp"
Earl Lee2e463fb2025-04-17 11:22:22 -07006 "context"
7 "encoding/json"
8 "fmt"
9 "io"
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +000010 "io/fs"
Earl Lee2e463fb2025-04-17 11:22:22 -070011 "log/slog"
12 "maps"
13 "os"
14 "os/exec"
15 "path/filepath"
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +000016 "runtime"
Earl Lee2e463fb2025-04-17 11:22:22 -070017 "slices"
18 "strings"
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +000019 "sync"
Earl Lee2e463fb2025-04-17 11:22:22 -070020 "time"
21
22 "golang.org/x/tools/go/packages"
23 "sketch.dev/ant"
24)
25
26// This file does differential quality analysis of a commit relative to a base commit.
27
28// Tool returns a tool spec for a CodeReview tool backed by r.
29func (r *CodeReviewer) Tool() *ant.Tool {
30 spec := &ant.Tool{
31 Name: "codereview",
32 Description: `Run an automated code review.`,
33 // If you modify this, update the termui template for prettier rendering.
Josh Bleecher Snyder2b2090e2025-04-29 16:41:34 -070034 InputSchema: ant.MustSchema(`{"type": "object", "properties": {}}`),
Earl Lee2e463fb2025-04-17 11:22:22 -070035 Run: r.Run,
36 }
37 return spec
38}
39
40func (r *CodeReviewer) Run(ctx context.Context, m json.RawMessage) (string, error) {
Josh Bleecher Snyder2dc86b92025-04-29 14:11:58 +000041 // NOTE: If you add or modify error messages here, update the corresponding UI parsing in:
42 // webui/src/web-components/sketch-tool-card.ts (SketchToolCardCodeReview.getStatusIcon)
Earl Lee2e463fb2025-04-17 11:22:22 -070043 if err := r.RequireNormalGitState(ctx); err != nil {
44 slog.DebugContext(ctx, "CodeReviewer.Run: failed to check for normal git state", "err", err)
45 return "", err
46 }
47 if err := r.RequireNoUncommittedChanges(ctx); err != nil {
48 slog.DebugContext(ctx, "CodeReviewer.Run: failed to check for uncommitted changes", "err", err)
49 return "", err
50 }
51
52 // Check that the current commit is not the initial commit
53 currentCommit, err := r.CurrentCommit(ctx)
54 if err != nil {
55 slog.DebugContext(ctx, "CodeReviewer.Run: failed to get current commit", "err", err)
56 return "", err
57 }
58 if r.IsInitialCommit(currentCommit) {
59 slog.DebugContext(ctx, "CodeReviewer.Run: current commit is initial commit, nothing to review")
60 return "", fmt.Errorf("no new commits have been added, nothing to review")
61 }
62
63 // No matter what failures happen from here out, we will declare this to have been reviewed.
64 // This should help avoid the model getting blocked by a broken code review tool.
65 r.reviewed = append(r.reviewed, currentCommit)
66
67 changedFiles, err := r.changedFiles(ctx, r.initialCommit, currentCommit)
68 if err != nil {
69 slog.DebugContext(ctx, "CodeReviewer.Run: failed to get changed files", "err", err)
70 return "", err
71 }
72
73 // Prepare to analyze before/after for the impacted files.
74 // We use the current commit to determine what packages exist and are impacted.
75 // The packages in the initial commit may be different.
76 // Good enough for now.
77 // TODO: do better
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +000078 allPkgs, err := r.packagesForFiles(ctx, changedFiles)
Earl Lee2e463fb2025-04-17 11:22:22 -070079 if err != nil {
80 // TODO: log and skip to stuff that doesn't require packages
81 slog.DebugContext(ctx, "CodeReviewer.Run: failed to get packages for files", "err", err)
82 return "", err
83 }
84 allPkgList := slices.Collect(maps.Keys(allPkgs))
Earl Lee2e463fb2025-04-17 11:22:22 -070085
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +000086 var errorMessages []string // problems we want the model to address
87 var infoMessages []string // info the model should consider
88
89 // Find potentially related files that should also be considered
90 // TODO: add some caching here, since this depends only on the initial commit and the changed files, not the details of the changes
91 relatedFiles, err := r.findRelatedFiles(ctx, changedFiles)
92 if err != nil {
93 slog.DebugContext(ctx, "CodeReviewer.Run: failed to find related files", "err", err)
94 } else {
95 relatedMsg := r.formatRelatedFiles(relatedFiles)
96 if relatedMsg != "" {
97 infoMessages = append(infoMessages, relatedMsg)
98 }
99 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700100
101 testMsg, err := r.checkTests(ctx, allPkgList)
102 if err != nil {
103 slog.DebugContext(ctx, "CodeReviewer.Run: failed to check tests", "err", err)
104 return "", err
105 }
106 if testMsg != "" {
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000107 errorMessages = append(errorMessages, testMsg)
Earl Lee2e463fb2025-04-17 11:22:22 -0700108 }
109
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000110 goplsMsg, err := r.checkGopls(ctx, changedFiles) // includes vet checks
Earl Lee2e463fb2025-04-17 11:22:22 -0700111 if err != nil {
112 slog.DebugContext(ctx, "CodeReviewer.Run: failed to check gopls", "err", err)
113 return "", err
114 }
115 if goplsMsg != "" {
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000116 errorMessages = append(errorMessages, goplsMsg)
Earl Lee2e463fb2025-04-17 11:22:22 -0700117 }
118
Josh Bleecher Snyder2dc86b92025-04-29 14:11:58 +0000119 // NOTE: If you change this output format, update the corresponding UI parsing in:
120 // webui/src/web-components/sketch-tool-card.ts (SketchToolCardCodeReview.getStatusIcon)
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000121 buf := new(strings.Builder)
122 if len(infoMessages) > 0 {
123 buf.WriteString("# Info\n\n")
124 buf.WriteString(strings.Join(infoMessages, "\n\n"))
125 buf.WriteString("\n\n")
Earl Lee2e463fb2025-04-17 11:22:22 -0700126 }
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000127 if len(errorMessages) > 0 {
128 buf.WriteString("# Errors\n\n")
129 buf.WriteString(strings.Join(errorMessages, "\n\n"))
130 buf.WriteString("\n\nPlease fix before proceeding.\n")
131 }
132 if buf.Len() == 0 {
133 buf.WriteString("OK")
134 }
135 return buf.String(), nil
Earl Lee2e463fb2025-04-17 11:22:22 -0700136}
137
138func (r *CodeReviewer) initializeInitialCommitWorktree(ctx context.Context) error {
139 if r.initialWorktree != "" {
140 return nil
141 }
142 tmpDir, err := os.MkdirTemp("", "sketch-codereview-worktree")
143 if err != nil {
144 return err
145 }
146 worktreeCmd := exec.CommandContext(ctx, "git", "worktree", "add", "--detach", tmpDir, r.initialCommit)
147 worktreeCmd.Dir = r.repoRoot
148 out, err := worktreeCmd.CombinedOutput()
149 if err != nil {
150 return fmt.Errorf("unable to create worktree for initial commit: %w\n%s", err, out)
151 }
152 r.initialWorktree = tmpDir
153 return nil
154}
155
156func (r *CodeReviewer) checkTests(ctx context.Context, pkgList []string) (string, error) {
157 goTestArgs := []string{"test", "-json", "-v"}
158 goTestArgs = append(goTestArgs, pkgList...)
159
160 afterTestCmd := exec.CommandContext(ctx, "go", goTestArgs...)
161 afterTestCmd.Dir = r.repoRoot
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000162 afterTestOut, _ := afterTestCmd.Output()
163 // unfortunately, we can't short-circuit here even if all tests pass,
164 // because we need to check for skipped tests.
Earl Lee2e463fb2025-04-17 11:22:22 -0700165
166 err := r.initializeInitialCommitWorktree(ctx)
167 if err != nil {
168 return "", err
169 }
170
171 beforeTestCmd := exec.CommandContext(ctx, "go", goTestArgs...)
172 beforeTestCmd.Dir = r.initialWorktree
173 beforeTestOut, _ := beforeTestCmd.Output() // ignore error, interesting info is in the output
174
175 // Parse the jsonl test results
176 beforeResults, beforeParseErr := parseTestResults(beforeTestOut)
177 if beforeParseErr != nil {
178 return "", fmt.Errorf("unable to parse test results for initial commit: %w\n%s", beforeParseErr, beforeTestOut)
179 }
180 afterResults, afterParseErr := parseTestResults(afterTestOut)
181 if afterParseErr != nil {
182 return "", fmt.Errorf("unable to parse test results for current commit: %w\n%s", afterParseErr, afterTestOut)
183 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700184 testRegressions, err := r.compareTestResults(beforeResults, afterResults)
185 if err != nil {
186 return "", fmt.Errorf("failed to compare test results: %w", err)
187 }
188 // TODO: better output formatting?
189 res := r.formatTestRegressions(testRegressions)
190 return res, nil
191}
192
Earl Lee2e463fb2025-04-17 11:22:22 -0700193// GoplsIssue represents a single issue reported by gopls check
194type GoplsIssue struct {
195 Position string // File position in format "file:line:col-range"
196 Message string // Description of the issue
197}
198
199// checkGopls runs gopls check on the provided files in both the current and initial state,
200// compares the results, and reports any new issues introduced in the current state.
201func (r *CodeReviewer) checkGopls(ctx context.Context, changedFiles []string) (string, error) {
202 if len(changedFiles) == 0 {
203 return "", nil // no files to check
204 }
205
206 // Filter out non-Go files as gopls only works on Go files
207 // and verify they still exist (not deleted)
208 var goFiles []string
209 for _, file := range changedFiles {
210 if !strings.HasSuffix(file, ".go") {
211 continue // not a Go file
212 }
213
214 // Check if the file still exists (not deleted)
215 if _, err := os.Stat(file); os.IsNotExist(err) {
216 continue // file doesn't exist anymore (deleted)
217 }
218
219 goFiles = append(goFiles, file)
220 }
221
222 if len(goFiles) == 0 {
223 return "", nil // no Go files to check
224 }
225
226 // Run gopls check on the current state
227 goplsArgs := append([]string{"check"}, goFiles...)
228
229 afterGoplsCmd := exec.CommandContext(ctx, "gopls", goplsArgs...)
230 afterGoplsCmd.Dir = r.repoRoot
231 afterGoplsOut, err := afterGoplsCmd.CombinedOutput() // gopls returns non-zero if it finds issues
232 if err != nil {
233 // Check if the output looks like real gopls issues or if it's just error output
234 if !looksLikeGoplsIssues(afterGoplsOut) {
235 slog.WarnContext(ctx, "CodeReviewer.checkGopls: gopls check failed to run properly", "err", err, "output", string(afterGoplsOut))
236 return "", nil // Skip rather than failing the entire code review
237 }
238 // Otherwise, proceed with parsing - it's likely just the non-zero exit code due to found issues
239 }
240
241 // Parse the output
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000242 afterIssues := parseGoplsOutput(r.repoRoot, afterGoplsOut)
Earl Lee2e463fb2025-04-17 11:22:22 -0700243
244 // If no issues were found, we're done
245 if len(afterIssues) == 0 {
246 return "", nil
247 }
248
249 // Gopls detected issues in the current state, check if they existed in the initial state
250 initErr := r.initializeInitialCommitWorktree(ctx)
251 if initErr != nil {
252 return "", err
253 }
254
255 // For each file that exists in the initial commit, run gopls check
256 var initialFilesToCheck []string
257 for _, file := range goFiles {
258 // Get relative path for git operations
259 relFile, err := filepath.Rel(r.repoRoot, file)
260 if err != nil {
261 slog.WarnContext(ctx, "CodeReviewer.checkGopls: failed to get relative path", "repo_root", r.repoRoot, "file", file, "err", err)
262 continue
263 }
264
265 // Check if the file exists in the initial commit
266 checkCmd := exec.CommandContext(ctx, "git", "cat-file", "-e", fmt.Sprintf("%s:%s", r.initialCommit, relFile))
267 checkCmd.Dir = r.repoRoot
268 if err := checkCmd.Run(); err == nil {
269 // File exists in initial commit
270 initialFilePath := filepath.Join(r.initialWorktree, relFile)
271 initialFilesToCheck = append(initialFilesToCheck, initialFilePath)
272 }
273 }
274
275 // Run gopls check on the files that existed in the initial commit
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000276 var beforeIssues []GoplsIssue
Earl Lee2e463fb2025-04-17 11:22:22 -0700277 if len(initialFilesToCheck) > 0 {
278 beforeGoplsArgs := append([]string{"check"}, initialFilesToCheck...)
279 beforeGoplsCmd := exec.CommandContext(ctx, "gopls", beforeGoplsArgs...)
280 beforeGoplsCmd.Dir = r.initialWorktree
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000281 beforeGoplsOut, beforeCmdErr := beforeGoplsCmd.CombinedOutput()
Earl Lee2e463fb2025-04-17 11:22:22 -0700282 if beforeCmdErr != nil && !looksLikeGoplsIssues(beforeGoplsOut) {
283 // If gopls fails to run properly on the initial commit, log a warning and continue
284 // with empty before issues - this will be conservative and report more issues
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000285 slog.WarnContext(ctx, "CodeReviewer.checkGopls: gopls check failed on initial commit", "err", err, "output", string(beforeGoplsOut))
Earl Lee2e463fb2025-04-17 11:22:22 -0700286 } else {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000287 beforeIssues = parseGoplsOutput(r.initialWorktree, beforeGoplsOut)
Earl Lee2e463fb2025-04-17 11:22:22 -0700288 }
289 }
290
291 // Find new issues that weren't present in the initial state
292 goplsRegressions := findGoplsRegressions(beforeIssues, afterIssues)
293 if len(goplsRegressions) == 0 {
294 return "", nil // no new issues
295 }
296
297 // Format the results
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000298 return r.formatGoplsRegressions(goplsRegressions), nil
Earl Lee2e463fb2025-04-17 11:22:22 -0700299}
300
301// parseGoplsOutput parses the text output from gopls check
302// Each line has the format: '/path/to/file.go:448:22-26: unused parameter: path'
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000303func parseGoplsOutput(root string, output []byte) []GoplsIssue {
Earl Lee2e463fb2025-04-17 11:22:22 -0700304 var issues []GoplsIssue
305 lines := strings.Split(string(output), "\n")
306
307 for _, line := range lines {
308 line = strings.TrimSpace(line)
309 if line == "" {
310 continue
311 }
312
313 // Skip lines that look like error messages rather than gopls issues
314 if strings.HasPrefix(line, "Error:") ||
315 strings.HasPrefix(line, "Failed:") ||
316 strings.HasPrefix(line, "Warning:") ||
317 strings.HasPrefix(line, "gopls:") {
318 continue
319 }
320
321 // Find the first colon that separates the file path from the line number
322 firstColonIdx := strings.Index(line, ":")
323 if firstColonIdx < 0 {
324 continue // Invalid format
325 }
326
327 // Verify the part before the first colon looks like a file path
328 potentialPath := line[:firstColonIdx]
329 if !strings.HasSuffix(potentialPath, ".go") {
330 continue // Not a Go file path
331 }
332
333 // Find the position of the first message separator ': '
334 // This separates the position info from the message
335 messageStart := strings.Index(line, ": ")
336 if messageStart < 0 || messageStart <= firstColonIdx {
337 continue // Invalid format
338 }
339
340 // Extract position and message
341 position := line[:messageStart]
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000342 rel, err := filepath.Rel(root, position)
343 if err == nil {
344 position = rel
345 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700346 message := line[messageStart+2:] // Skip the ': ' separator
347
348 // Verify position has the expected format (at least 2 colons for line:col)
349 colonCount := strings.Count(position, ":")
350 if colonCount < 2 {
351 continue // Not enough position information
352 }
353
354 issues = append(issues, GoplsIssue{
355 Position: position,
356 Message: message,
357 })
358 }
359
360 return issues
361}
362
363// looksLikeGoplsIssues checks if the output appears to be actual gopls issues
364// rather than error messages about gopls itself failing
365func looksLikeGoplsIssues(output []byte) bool {
366 // If output is empty, it's not valid issues
367 if len(output) == 0 {
368 return false
369 }
370
371 // Check if output has at least one line that looks like a gopls issue
372 // A gopls issue looks like: '/path/to/file.go:123:45-67: message'
373 lines := strings.Split(string(output), "\n")
374 for _, line := range lines {
375 line = strings.TrimSpace(line)
376 if line == "" {
377 continue
378 }
379
380 // A gopls issue has at least two colons (file path, line number, column)
381 // and contains a colon followed by a space (separating position from message)
382 colonCount := strings.Count(line, ":")
383 hasSeparator := strings.Contains(line, ": ")
384
385 if colonCount >= 2 && hasSeparator {
386 // Check if it starts with a likely file path (ending in .go)
387 parts := strings.SplitN(line, ":", 2)
388 if strings.HasSuffix(parts[0], ".go") {
389 return true
390 }
391 }
392 }
393 return false
394}
395
396// normalizeGoplsPosition extracts just the file path from a position string
397func normalizeGoplsPosition(position string) string {
398 // Extract just the file path by taking everything before the first colon
399 parts := strings.Split(position, ":")
400 if len(parts) < 1 {
401 return position
402 }
403 return parts[0]
404}
405
406// findGoplsRegressions identifies gopls issues that are new in the after state
407func findGoplsRegressions(before, after []GoplsIssue) []GoplsIssue {
408 var regressions []GoplsIssue
409
410 // Build map of before issues for easier lookup
411 beforeIssueMap := make(map[string]map[string]bool) // file -> message -> exists
412 for _, issue := range before {
413 file := normalizeGoplsPosition(issue.Position)
414 if _, exists := beforeIssueMap[file]; !exists {
415 beforeIssueMap[file] = make(map[string]bool)
416 }
417 // Store both the exact message and the general issue type for fuzzy matching
418 beforeIssueMap[file][issue.Message] = true
419
420 // Extract the general issue type (everything before the first ':' in the message)
421 generalIssue := issue.Message
422 if colonIdx := strings.Index(issue.Message, ":"); colonIdx > 0 {
423 generalIssue = issue.Message[:colonIdx]
424 }
425 beforeIssueMap[file][generalIssue] = true
426 }
427
428 // Check each after issue to see if it's new
429 for _, afterIssue := range after {
430 file := normalizeGoplsPosition(afterIssue.Position)
431 isNew := true
432
433 if fileIssues, fileExists := beforeIssueMap[file]; fileExists {
434 // Check for exact message match
435 if fileIssues[afterIssue.Message] {
436 isNew = false
437 } else {
438 // Check for general issue type match
439 generalIssue := afterIssue.Message
440 if colonIdx := strings.Index(afterIssue.Message, ":"); colonIdx > 0 {
441 generalIssue = afterIssue.Message[:colonIdx]
442 }
443 if fileIssues[generalIssue] {
444 isNew = false
445 }
446 }
447 }
448
449 if isNew {
450 regressions = append(regressions, afterIssue)
451 }
452 }
453
454 // Sort regressions for deterministic output
455 slices.SortFunc(regressions, func(a, b GoplsIssue) int {
456 return strings.Compare(a.Position, b.Position)
457 })
458
459 return regressions
460}
461
462// formatGoplsRegressions generates a human-readable summary of gopls check regressions
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000463func (r *CodeReviewer) formatGoplsRegressions(regressions []GoplsIssue) string {
Earl Lee2e463fb2025-04-17 11:22:22 -0700464 if len(regressions) == 0 {
465 return ""
466 }
467
468 var sb strings.Builder
469 sb.WriteString("Gopls check issues detected:\n\n")
470
471 // Format each issue
472 for i, issue := range regressions {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000473 sb.WriteString(fmt.Sprintf("%d. %s: %s\n", i+1, filepath.Join(r.repoRoot, issue.Position), issue.Message))
Earl Lee2e463fb2025-04-17 11:22:22 -0700474 }
475
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000476 sb.WriteString("\nIMPORTANT: Only fix new gopls check issues in parts of the code that you have already edited.")
477 sb.WriteString(" Do not change existing code that was not part of your current edits.\n")
Earl Lee2e463fb2025-04-17 11:22:22 -0700478 return sb.String()
479}
480
481func (r *CodeReviewer) HasReviewed(commit string) bool {
482 return slices.Contains(r.reviewed, commit)
483}
484
485func (r *CodeReviewer) IsInitialCommit(commit string) bool {
486 return commit == r.initialCommit
487}
488
489// packagesForFiles returns maps of packages related to the given files:
490// 1. directPkgs: packages that directly contain the changed files
491// 2. allPkgs: all packages that might be affected, including downstream packages that depend on the direct packages
492// It may include false positives.
493// Files must be absolute paths!
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000494func (r *CodeReviewer) packagesForFiles(ctx context.Context, files []string) (allPkgs map[string]*packages.Package, err error) {
Earl Lee2e463fb2025-04-17 11:22:22 -0700495 for _, f := range files {
496 if !filepath.IsAbs(f) {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000497 return nil, fmt.Errorf("path %q is not absolute", f)
Earl Lee2e463fb2025-04-17 11:22:22 -0700498 }
499 }
500 cfg := &packages.Config{
501 Mode: packages.LoadImports | packages.NeedEmbedFiles,
502 Context: ctx,
503 // Logf: func(msg string, args ...any) {
504 // slog.DebugContext(ctx, "loading go packages", "msg", fmt.Sprintf(msg, args...))
505 // },
506 // TODO: in theory, go.mod might not be in the repo root, and there might be multiple go.mod files.
507 // We can cross that bridge when we get there.
508 Dir: r.repoRoot,
509 Tests: true,
510 }
511 universe, err := packages.Load(cfg, "./...")
512 if err != nil {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000513 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -0700514 }
515 // Identify packages that directly contain the changed files
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000516 directPkgs := make(map[string]*packages.Package) // import path -> package
Earl Lee2e463fb2025-04-17 11:22:22 -0700517 for _, pkg := range universe {
Earl Lee2e463fb2025-04-17 11:22:22 -0700518 pkgFiles := allFiles(pkg)
Earl Lee2e463fb2025-04-17 11:22:22 -0700519 for _, file := range files {
520 if pkgFiles[file] {
521 // prefer test packages, as they contain strictly more files (right?)
522 prev := directPkgs[pkg.PkgPath]
523 if prev == nil || prev.ForTest == "" {
524 directPkgs[pkg.PkgPath] = pkg
525 }
526 }
527 }
528 }
529
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000530 allPkgs = maps.Clone(directPkgs)
Earl Lee2e463fb2025-04-17 11:22:22 -0700531
532 // Add packages that depend on the direct packages
533 addDependentPackages(universe, allPkgs)
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000534 return allPkgs, nil
Earl Lee2e463fb2025-04-17 11:22:22 -0700535}
536
537// allFiles returns all files that might be referenced by the package.
538// It may contain false positives.
539func allFiles(p *packages.Package) map[string]bool {
540 files := make(map[string]bool)
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000541 // Add files from package info
Earl Lee2e463fb2025-04-17 11:22:22 -0700542 add := [][]string{p.GoFiles, p.CompiledGoFiles, p.OtherFiles, p.EmbedFiles, p.IgnoredFiles}
543 for _, extra := range add {
544 for _, file := range extra {
545 files[file] = true
546 }
547 }
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000548 // Add files from testdata directory
549 testdataDir := filepath.Join(p.Dir, "testdata")
550 if _, err := os.Stat(testdataDir); err == nil {
551 fsys := os.DirFS(p.Dir)
552 fs.WalkDir(fsys, "testdata", func(path string, d fs.DirEntry, err error) error {
553 if err == nil && !d.IsDir() {
554 files[filepath.Join(p.Dir, path)] = true
555 }
556 return nil
557 })
558 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700559 return files
560}
561
562// addDependentPackages adds to pkgs all packages from universe
563// that directly or indirectly depend on any package already in pkgs.
564func addDependentPackages(universe []*packages.Package, pkgs map[string]*packages.Package) {
565 for {
566 changed := false
567 for _, p := range universe {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000568 if strings.HasSuffix(p.PkgPath, ".test") { // ick, but I don't see another way
569 // skip test packages
570 continue
571 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700572 if _, ok := pkgs[p.PkgPath]; ok {
573 // already in pkgs
574 continue
575 }
576 for importPath := range p.Imports {
577 if _, ok := pkgs[importPath]; ok {
578 // imports a package dependent on pkgs, add it
579 pkgs[p.PkgPath] = p
580 changed = true
581 break
582 }
583 }
584 }
585 if !changed {
586 break
587 }
588 }
589}
590
591// testJSON is a union of BuildEvent and TestEvent
592type testJSON struct {
593 // TestEvent only:
594 // The Time field holds the time the event happened. It is conventionally omitted
595 // for cached test results.
596 Time time.Time `json:"Time"`
597 // BuildEvent only:
598 // The ImportPath field gives the package ID of the package being built.
599 // This matches the Package.ImportPath field of go list -json and the
600 // TestEvent.FailedBuild field of go test -json. Note that it does not
601 // match TestEvent.Package.
602 ImportPath string `json:"ImportPath"` // BuildEvent only
603 // TestEvent only:
604 // The Package field, if present, specifies the package being tested. When the
605 // go command runs parallel tests in -json mode, events from different tests are
606 // interlaced; the Package field allows readers to separate them.
607 Package string `json:"Package"`
608 // Action is used in both BuildEvent and TestEvent.
609 // It is the key to distinguishing between them.
610 // BuildEvent:
611 // build-output or build-fail
612 // TestEvent:
613 // start, run, pause, cont, pass, bench, fail, output, skip
614 Action string `json:"Action"`
615 // TestEvent only:
616 // The Test field, if present, specifies the test, example, or benchmark function
617 // that caused the event. Events for the overall package test do not set Test.
618 Test string `json:"Test"`
619 // TestEvent only:
620 // The Elapsed field is set for "pass" and "fail" events. It gives the time elapsed in seconds
621 // for the specific test or the overall package test that passed or failed.
622 Elapsed float64
623 // TestEvent:
624 // The Output field is set for Action == "output" and is a portion of the
625 // test's output (standard output and standard error merged together). The
626 // output is unmodified except that invalid UTF-8 output from a test is coerced
627 // into valid UTF-8 by use of replacement characters. With that one exception,
628 // the concatenation of the Output fields of all output events is the exact output
629 // of the test execution.
630 // BuildEvent:
631 // The Output field is set for Action == "build-output" and is a portion of
632 // the build's output. The concatenation of the Output fields of all output
633 // events is the exact output of the build. A single event may contain one
634 // or more lines of output and there may be more than one output event for
635 // a given ImportPath. This matches the definition of the TestEvent.Output
636 // field produced by go test -json.
637 Output string `json:"Output"`
638 // TestEvent only:
639 // The FailedBuild field is set for Action == "fail" if the test failure was caused
640 // by a build failure. It contains the package ID of the package that failed to
641 // build. This matches the ImportPath field of the "go list" output, as well as the
642 // BuildEvent.ImportPath field as emitted by "go build -json".
643 FailedBuild string `json:"FailedBuild"`
644}
645
646// parseTestResults converts test output in JSONL format into a slice of testJSON objects
647func parseTestResults(testOutput []byte) ([]testJSON, error) {
648 var results []testJSON
649 dec := json.NewDecoder(bytes.NewReader(testOutput))
650 for {
651 var event testJSON
652 if err := dec.Decode(&event); err != nil {
653 if err == io.EOF {
654 break
655 }
656 return nil, err
657 }
658 results = append(results, event)
659 }
660 return results, nil
661}
662
663// testStatus represents the status of a test in a given commit
664type testStatus int
665
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000666//go:generate go tool stringer -type=testStatus -trimprefix=testStatus
Earl Lee2e463fb2025-04-17 11:22:22 -0700667const (
668 testStatusUnknown testStatus = iota
669 testStatusPass
670 testStatusFail
671 testStatusBuildFail
672 testStatusSkip
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000673 testStatusNoTests // no tests exist for this package
Earl Lee2e463fb2025-04-17 11:22:22 -0700674)
675
Earl Lee2e463fb2025-04-17 11:22:22 -0700676// testRegression represents a test that regressed between commits
677type testRegression struct {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000678 Package string
679 Test string // empty for package tests
Earl Lee2e463fb2025-04-17 11:22:22 -0700680 BeforeStatus testStatus
681 AfterStatus testStatus
682 Output string // failure output in the after state
683}
684
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000685func (r *testRegression) Source() string {
686 if r.Test == "" {
687 return r.Package
Earl Lee2e463fb2025-04-17 11:22:22 -0700688 }
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000689 return fmt.Sprintf("%s.%s", r.Package, r.Test)
690}
Earl Lee2e463fb2025-04-17 11:22:22 -0700691
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000692type packageResult struct {
693 Status testStatus // overall status for the package
694 TestStatus map[string]testStatus // name -> status
695 TestOutput map[string][]string // name -> output parts
696}
Earl Lee2e463fb2025-04-17 11:22:22 -0700697
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000698// collectTestStatuses processes a slice of test events and returns rich status information
699func collectTestStatuses(results []testJSON) map[string]*packageResult {
700 m := make(map[string]*packageResult)
701
702 for _, event := range results {
703 pkg := event.Package
704 p, ok := m[pkg]
705 if !ok {
706 p = new(packageResult)
707 p.TestStatus = make(map[string]testStatus)
708 p.TestOutput = make(map[string][]string)
709 m[pkg] = p
Earl Lee2e463fb2025-04-17 11:22:22 -0700710 }
711
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000712 switch event.Action {
713 case "output":
714 p.TestOutput[event.Test] = append(p.TestOutput[event.Test], event.Output)
Earl Lee2e463fb2025-04-17 11:22:22 -0700715 continue
Earl Lee2e463fb2025-04-17 11:22:22 -0700716 case "pass":
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000717 if event.Test == "" {
718 p.Status = testStatusPass
719 } else {
720 p.TestStatus[event.Test] = testStatusPass
721 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700722 case "fail":
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000723 if event.Test == "" {
724 if event.FailedBuild != "" {
725 p.Status = testStatusBuildFail
726 } else {
727 p.Status = testStatusFail
728 }
729 } else {
730 p.TestStatus[event.Test] = testStatusFail
731 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700732 case "skip":
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000733 if event.Test == "" {
734 p.Status = testStatusNoTests
735 } else {
736 p.TestStatus[event.Test] = testStatusSkip
737 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700738 }
739 }
740
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000741 return m
Earl Lee2e463fb2025-04-17 11:22:22 -0700742}
743
744// compareTestResults identifies tests that have regressed between commits
745func (r *CodeReviewer) compareTestResults(beforeResults, afterResults []testJSON) ([]testRegression, error) {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000746 before := collectTestStatuses(beforeResults)
747 after := collectTestStatuses(afterResults)
748 var testLevelRegressions []testRegression
749 var packageLevelRegressions []testRegression
Earl Lee2e463fb2025-04-17 11:22:22 -0700750
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000751 afterPkgs := slices.Sorted(maps.Keys(after))
752 for _, pkg := range afterPkgs {
753 afterResult := after[pkg]
754 afterStatus := afterResult.Status
755 // Can't short-circuit here when tests are passing, because we need to check for skipped tests.
756 beforeResult, ok := before[pkg]
757 beforeStatus := testStatusNoTests
758 if ok {
759 beforeStatus = beforeResult.Status
760 }
761 // If things no longer build, stop at the package level.
762 // Otherwise, proceed to the test level, so we have more precise information.
763 if afterStatus == testStatusBuildFail && beforeStatus != testStatusBuildFail {
764 packageLevelRegressions = append(packageLevelRegressions, testRegression{
765 Package: pkg,
766 BeforeStatus: beforeStatus,
767 AfterStatus: afterStatus,
768 })
769 continue
770 }
771 tests := slices.Sorted(maps.Keys(afterResult.TestStatus))
772 for _, test := range tests {
773 afterStatus := afterResult.TestStatus[test]
774 switch afterStatus {
775 case testStatusPass:
776 continue
777 case testStatusUnknown:
778 slog.WarnContext(context.Background(), "unknown test status", "package", pkg, "test", test)
779 continue
780 }
781 beforeStatus := beforeResult.TestStatus[test]
782 if isRegression(beforeStatus, afterStatus) {
783 testLevelRegressions = append(testLevelRegressions, testRegression{
784 Package: pkg,
785 Test: test,
786 BeforeStatus: beforeStatus,
787 AfterStatus: afterStatus,
788 Output: strings.Join(afterResult.TestOutput[test], ""),
789 })
790 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700791 }
792 }
793
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000794 // If we have test-level regressions, report only those
795 // Otherwise, report package-level regressions
Earl Lee2e463fb2025-04-17 11:22:22 -0700796 var regressions []testRegression
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000797 if len(testLevelRegressions) > 0 {
798 regressions = testLevelRegressions
799 } else {
800 regressions = packageLevelRegressions
Earl Lee2e463fb2025-04-17 11:22:22 -0700801 }
802
803 // Sort regressions for consistent output
804 slices.SortFunc(regressions, func(a, b testRegression) int {
805 // First by package
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000806 if c := strings.Compare(a.Package, b.Package); c != 0 {
Earl Lee2e463fb2025-04-17 11:22:22 -0700807 return c
808 }
809 // Then by test name
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000810 return strings.Compare(a.Test, b.Test)
Earl Lee2e463fb2025-04-17 11:22:22 -0700811 })
812
813 return regressions, nil
814}
815
816// badnessLevels maps test status to a badness level
817// Higher values indicate worse status (more severe issues)
818var badnessLevels = map[testStatus]int{
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000819 testStatusBuildFail: 5, // Worst
820 testStatusFail: 4,
821 testStatusSkip: 3,
822 testStatusNoTests: 2,
Earl Lee2e463fb2025-04-17 11:22:22 -0700823 testStatusPass: 1,
824 testStatusUnknown: 0, // Least bad - avoids false positives
825}
826
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000827// regressionFormatter defines a mapping of before/after state pairs to descriptive messages
828type regressionKey struct {
829 before, after testStatus
830}
831
832var regressionMessages = map[regressionKey]string{
833 {testStatusUnknown, testStatusBuildFail}: "New test has build/vet errors",
834 {testStatusUnknown, testStatusFail}: "New test is failing",
835 {testStatusUnknown, testStatusSkip}: "New test is skipped",
836 {testStatusPass, testStatusBuildFail}: "Was passing, now has build/vet errors",
837 {testStatusPass, testStatusFail}: "Was passing, now failing",
838 {testStatusPass, testStatusSkip}: "Was passing, now skipped",
839 {testStatusNoTests, testStatusBuildFail}: "Previously had no tests, now has build/vet errors",
840 {testStatusNoTests, testStatusFail}: "Previously had no tests, now has failing tests",
841 {testStatusNoTests, testStatusSkip}: "Previously had no tests, now has skipped tests",
842 {testStatusSkip, testStatusBuildFail}: "Was skipped, now has build/vet errors",
843 {testStatusSkip, testStatusFail}: "Was skipped, now failing",
844 {testStatusFail, testStatusBuildFail}: "Was failing, now has build/vet errors",
845}
846
Earl Lee2e463fb2025-04-17 11:22:22 -0700847// isRegression determines if a test has regressed based on before and after status
848// A regression is defined as an increase in badness level
849func isRegression(before, after testStatus) bool {
850 // Higher badness level means worse status
851 return badnessLevels[after] > badnessLevels[before]
852}
853
854// formatTestRegressions generates a human-readable summary of test regressions
855func (r *CodeReviewer) formatTestRegressions(regressions []testRegression) string {
856 if len(regressions) == 0 {
857 return ""
858 }
859
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000860 buf := new(strings.Builder)
861 fmt.Fprintf(buf, "Test regressions detected between initial commit (%s) and HEAD:\n\n", r.initialCommit)
Earl Lee2e463fb2025-04-17 11:22:22 -0700862
863 for i, reg := range regressions {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000864 fmt.Fprintf(buf, "%d: %v: ", i+1, reg.Source())
865 key := regressionKey{reg.BeforeStatus, reg.AfterStatus}
866 message, exists := regressionMessages[key]
867 if !exists {
868 message = "Regression detected"
Earl Lee2e463fb2025-04-17 11:22:22 -0700869 }
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000870 fmt.Fprintf(buf, "%s\n", message)
Earl Lee2e463fb2025-04-17 11:22:22 -0700871 }
872
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000873 return buf.String()
Earl Lee2e463fb2025-04-17 11:22:22 -0700874}
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000875
876// RelatedFile represents a file historically related to the changed files
877type RelatedFile struct {
878 Path string // Path to the file
879 Correlation float64 // Correlation score (0.0-1.0)
880}
881
882// findRelatedFiles identifies files that are historically related to the changed files
883// by analyzing git commit history for co-occurrences.
884func (r *CodeReviewer) findRelatedFiles(ctx context.Context, changedFiles []string) ([]RelatedFile, error) {
885 commits, err := r.getCommitsTouchingFiles(ctx, changedFiles)
886 if err != nil {
887 return nil, fmt.Errorf("failed to get commits touching files: %w", err)
888 }
889 if len(commits) == 0 {
890 return nil, nil
891 }
892
893 relChanged := make(map[string]bool, len(changedFiles))
894 for _, file := range changedFiles {
895 rel, err := filepath.Rel(r.repoRoot, file)
896 if err != nil {
897 return nil, fmt.Errorf("failed to get relative path for %s: %w", file, err)
898 }
899 relChanged[rel] = true
900 }
901
902 historyFiles := make(map[string]int)
903 var historyMu sync.Mutex
904
905 maxWorkers := runtime.GOMAXPROCS(0)
906 semaphore := make(chan bool, maxWorkers)
907 var wg sync.WaitGroup
908
909 for _, commit := range commits {
910 wg.Add(1)
911 semaphore <- true // acquire
912
913 go func(commit string) {
914 defer wg.Done()
915 defer func() { <-semaphore }() // release
916 commitFiles, err := r.getFilesInCommit(ctx, commit)
917 if err != nil {
918 slog.WarnContext(ctx, "Failed to get files in commit", "commit", commit, "err", err)
919 return
920 }
921 incr := 0
922 for _, file := range commitFiles {
923 if relChanged[file] {
924 incr++
925 }
926 }
927 if incr == 0 {
928 return
929 }
930 historyMu.Lock()
931 defer historyMu.Unlock()
932 for _, file := range commitFiles {
933 historyFiles[file] += incr
934 }
935 }(commit)
936 }
937 wg.Wait()
938
939 // normalize
940 maxCount := 0
941 for _, count := range historyFiles {
942 maxCount = max(maxCount, count)
943 }
944 if maxCount == 0 {
945 return nil, nil
946 }
947
948 var relatedFiles []RelatedFile
949 for file, count := range historyFiles {
950 if relChanged[file] {
951 // Don't include inputs in the output.
952 continue
953 }
954 correlation := float64(count) / float64(maxCount)
955 // Require min correlation to avoid noise
956 if correlation >= 0.1 {
957 relatedFiles = append(relatedFiles, RelatedFile{Path: file, Correlation: correlation})
958 }
959 }
960
961 // Highest correlation first
962 slices.SortFunc(relatedFiles, func(a, b RelatedFile) int {
963 return -1 * cmp.Compare(a.Correlation, b.Correlation)
964 })
965
966 // Limit to 1 correlated file per input file.
967 // (Arbitrary limit, to be adjusted.)
968 maxFiles := len(changedFiles)
969 if len(relatedFiles) > maxFiles {
970 relatedFiles = relatedFiles[:maxFiles]
971 }
972
973 // TODO: add an LLM in the mix here (like the keyword search tool) to do a filtering pass,
974 // and then increase the strength of the wording in the relatedFiles message.
975
976 return relatedFiles, nil
977}
978
979// getCommitsTouchingFiles returns all commits that touch any of the specified files
980func (r *CodeReviewer) getCommitsTouchingFiles(ctx context.Context, files []string) ([]string, error) {
981 if len(files) == 0 {
982 return nil, nil
983 }
Josh Bleecher Snyderd4b1cc72025-04-29 13:49:18 +0000984 fileArgs := append([]string{"rev-list", "--all", "--date-order", "--max-count=100", "--"}, files...)
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000985 cmd := exec.CommandContext(ctx, "git", fileArgs...)
986 cmd.Dir = r.repoRoot
987 out, err := cmd.CombinedOutput()
988 if err != nil {
989 return nil, fmt.Errorf("failed to get commits: %w\n%s", err, out)
990 }
991 return nonEmptyTrimmedLines(out), nil
992}
993
994// getFilesInCommit returns all files changed in a specific commit
995func (r *CodeReviewer) getFilesInCommit(ctx context.Context, commit string) ([]string, error) {
996 cmd := exec.CommandContext(ctx, "git", "diff-tree", "--no-commit-id", "--name-only", "-r", commit)
997 cmd.Dir = r.repoRoot
998 out, err := cmd.CombinedOutput()
999 if err != nil {
1000 return nil, fmt.Errorf("failed to get files in commit: %w\n%s", err, out)
1001 }
1002 return nonEmptyTrimmedLines(out), nil
1003}
1004
1005func nonEmptyTrimmedLines(b []byte) []string {
1006 var lines []string
1007 for line := range strings.Lines(string(b)) {
1008 line = strings.TrimSpace(line)
1009 if line != "" {
1010 lines = append(lines, line)
1011 }
1012 }
1013 return lines
1014}
1015
1016// formatRelatedFiles formats the related files list into a human-readable message
1017func (r *CodeReviewer) formatRelatedFiles(files []RelatedFile) string {
1018 if len(files) == 0 {
1019 return ""
1020 }
1021
1022 buf := new(strings.Builder)
1023
1024 fmt.Fprintf(buf, "Potentially related files:\n\n")
1025
1026 for _, file := range files {
1027 fmt.Fprintf(buf, "- %s (%0.0f%%)\n", file.Path, 100*file.Correlation)
1028 }
1029
1030 fmt.Fprintf(buf, "\nThese files have historically changed with the files you have modified. Consider whether they require updates as well.\n")
1031 return buf.String()
1032}