blob: 25defa4ef68f96bb8a74f9d324170c03928474c8 [file] [log] [blame]
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +00001package codereview
Earl Lee2e463fb2025-04-17 11:22:22 -07002
3import (
4 "bytes"
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +00005 "cmp"
Earl Lee2e463fb2025-04-17 11:22:22 -07006 "context"
7 "encoding/json"
8 "fmt"
9 "io"
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +000010 "io/fs"
Earl Lee2e463fb2025-04-17 11:22:22 -070011 "log/slog"
12 "maps"
13 "os"
14 "os/exec"
15 "path/filepath"
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +000016 "runtime"
Earl Lee2e463fb2025-04-17 11:22:22 -070017 "slices"
18 "strings"
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +000019 "sync"
Earl Lee2e463fb2025-04-17 11:22:22 -070020 "time"
21
22 "golang.org/x/tools/go/packages"
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070023 "sketch.dev/llm"
Earl Lee2e463fb2025-04-17 11:22:22 -070024)
25
26// This file does differential quality analysis of a commit relative to a base commit.
27
28// Tool returns a tool spec for a CodeReview tool backed by r.
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070029func (r *CodeReviewer) Tool() *llm.Tool {
30 spec := &llm.Tool{
Earl Lee2e463fb2025-04-17 11:22:22 -070031 Name: "codereview",
Josh Bleecher Snyder272c4a92025-06-04 16:08:42 -070032 Description: `Run an automated code review before presenting git commits to the user. Call if/when you've completed your current work and are ready for user feedback.`,
Earl Lee2e463fb2025-04-17 11:22:22 -070033 // If you modify this, update the termui template for prettier rendering.
Josh Bleecher Snyder74d690e2025-05-14 18:16:03 -070034 InputSchema: llm.EmptySchema(),
Earl Lee2e463fb2025-04-17 11:22:22 -070035 Run: r.Run,
36 }
37 return spec
38}
39
Philip Zeyliger72252cb2025-05-10 17:00:08 -070040func (r *CodeReviewer) Run(ctx context.Context, m json.RawMessage) ([]llm.Content, error) {
Josh Bleecher Snyder2dc86b92025-04-29 14:11:58 +000041 // NOTE: If you add or modify error messages here, update the corresponding UI parsing in:
42 // webui/src/web-components/sketch-tool-card.ts (SketchToolCardCodeReview.getStatusIcon)
Earl Lee2e463fb2025-04-17 11:22:22 -070043 if err := r.RequireNormalGitState(ctx); err != nil {
44 slog.DebugContext(ctx, "CodeReviewer.Run: failed to check for normal git state", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -070045 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -070046 }
47 if err := r.RequireNoUncommittedChanges(ctx); err != nil {
48 slog.DebugContext(ctx, "CodeReviewer.Run: failed to check for uncommitted changes", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -070049 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -070050 }
51
52 // Check that the current commit is not the initial commit
53 currentCommit, err := r.CurrentCommit(ctx)
54 if err != nil {
55 slog.DebugContext(ctx, "CodeReviewer.Run: failed to get current commit", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -070056 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -070057 }
58 if r.IsInitialCommit(currentCommit) {
59 slog.DebugContext(ctx, "CodeReviewer.Run: current commit is initial commit, nothing to review")
Philip Zeyliger72252cb2025-05-10 17:00:08 -070060 return nil, fmt.Errorf("no new commits have been added, nothing to review")
Earl Lee2e463fb2025-04-17 11:22:22 -070061 }
62
63 // No matter what failures happen from here out, we will declare this to have been reviewed.
64 // This should help avoid the model getting blocked by a broken code review tool.
65 r.reviewed = append(r.reviewed, currentCommit)
66
Philip Zeyliger49edc922025-05-14 09:45:45 -070067 changedFiles, err := r.changedFiles(ctx, r.sketchBaseRef, currentCommit)
Earl Lee2e463fb2025-04-17 11:22:22 -070068 if err != nil {
69 slog.DebugContext(ctx, "CodeReviewer.Run: failed to get changed files", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -070070 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -070071 }
72
73 // Prepare to analyze before/after for the impacted files.
74 // We use the current commit to determine what packages exist and are impacted.
75 // The packages in the initial commit may be different.
76 // Good enough for now.
77 // TODO: do better
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +000078 allPkgs, err := r.packagesForFiles(ctx, changedFiles)
Earl Lee2e463fb2025-04-17 11:22:22 -070079 if err != nil {
80 // TODO: log and skip to stuff that doesn't require packages
81 slog.DebugContext(ctx, "CodeReviewer.Run: failed to get packages for files", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -070082 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -070083 }
84 allPkgList := slices.Collect(maps.Keys(allPkgs))
Earl Lee2e463fb2025-04-17 11:22:22 -070085
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +000086 var errorMessages []string // problems we want the model to address
87 var infoMessages []string // info the model should consider
88
Josh Bleecher Snydercf191902025-06-04 18:18:40 +000089 // Run 'go generate' early, so that it can potentially fix tests that would otherwise fail.
90 generateChanges, err := r.runGenerate(ctx, allPkgList)
91 if err != nil {
92 errorMessages = append(errorMessages, err.Error())
93 }
94 if len(generateChanges) > 0 {
95 buf := new(strings.Builder)
96 buf.WriteString("The following files were changed by running `go generate`:\n\n")
97 for _, f := range generateChanges {
98 buf.WriteString(f)
99 buf.WriteString("\n")
100 }
101 buf.WriteString("\nPlease amend your latest git commit with these changes.\n")
102 infoMessages = append(infoMessages, buf.String())
103 }
104
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000105 // Find potentially related files that should also be considered
106 // TODO: add some caching here, since this depends only on the initial commit and the changed files, not the details of the changes
Josh Bleecher Snydercf191902025-06-04 18:18:40 +0000107 // TODO: arrange for this to run even in non-Go repos!
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000108 relatedFiles, err := r.findRelatedFiles(ctx, changedFiles)
109 if err != nil {
110 slog.DebugContext(ctx, "CodeReviewer.Run: failed to find related files", "err", err)
111 } else {
112 relatedMsg := r.formatRelatedFiles(relatedFiles)
113 if relatedMsg != "" {
114 infoMessages = append(infoMessages, relatedMsg)
115 }
116 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700117
118 testMsg, err := r.checkTests(ctx, allPkgList)
119 if err != nil {
120 slog.DebugContext(ctx, "CodeReviewer.Run: failed to check tests", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700121 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -0700122 }
123 if testMsg != "" {
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000124 errorMessages = append(errorMessages, testMsg)
Earl Lee2e463fb2025-04-17 11:22:22 -0700125 }
126
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000127 goplsMsg, err := r.checkGopls(ctx, changedFiles) // includes vet checks
Earl Lee2e463fb2025-04-17 11:22:22 -0700128 if err != nil {
129 slog.DebugContext(ctx, "CodeReviewer.Run: failed to check gopls", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700130 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -0700131 }
132 if goplsMsg != "" {
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000133 errorMessages = append(errorMessages, goplsMsg)
Earl Lee2e463fb2025-04-17 11:22:22 -0700134 }
135
Josh Bleecher Snyder2dc86b92025-04-29 14:11:58 +0000136 // NOTE: If you change this output format, update the corresponding UI parsing in:
137 // webui/src/web-components/sketch-tool-card.ts (SketchToolCardCodeReview.getStatusIcon)
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000138 buf := new(strings.Builder)
139 if len(infoMessages) > 0 {
140 buf.WriteString("# Info\n\n")
141 buf.WriteString(strings.Join(infoMessages, "\n\n"))
142 buf.WriteString("\n\n")
Earl Lee2e463fb2025-04-17 11:22:22 -0700143 }
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000144 if len(errorMessages) > 0 {
145 buf.WriteString("# Errors\n\n")
146 buf.WriteString(strings.Join(errorMessages, "\n\n"))
147 buf.WriteString("\n\nPlease fix before proceeding.\n")
148 }
149 if buf.Len() == 0 {
150 buf.WriteString("OK")
151 }
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700152 return llm.TextContent(buf.String()), nil
Earl Lee2e463fb2025-04-17 11:22:22 -0700153}
154
155func (r *CodeReviewer) initializeInitialCommitWorktree(ctx context.Context) error {
156 if r.initialWorktree != "" {
157 return nil
158 }
159 tmpDir, err := os.MkdirTemp("", "sketch-codereview-worktree")
160 if err != nil {
161 return err
162 }
Philip Zeyliger49edc922025-05-14 09:45:45 -0700163 worktreeCmd := exec.CommandContext(ctx, "git", "worktree", "add", "--detach", tmpDir, r.sketchBaseRef)
Earl Lee2e463fb2025-04-17 11:22:22 -0700164 worktreeCmd.Dir = r.repoRoot
165 out, err := worktreeCmd.CombinedOutput()
166 if err != nil {
167 return fmt.Errorf("unable to create worktree for initial commit: %w\n%s", err, out)
168 }
169 r.initialWorktree = tmpDir
170 return nil
171}
172
173func (r *CodeReviewer) checkTests(ctx context.Context, pkgList []string) (string, error) {
Josh Bleecher Snyderde5f7442025-05-15 18:32:32 +0000174 // 'gopls check' covers everything that 'go vet' covers.
175 // Disabling vet here speeds things up, and allows more precise filtering and reporting.
176 goTestArgs := []string{"test", "-json", "-v", "-vet=off"}
Earl Lee2e463fb2025-04-17 11:22:22 -0700177 goTestArgs = append(goTestArgs, pkgList...)
178
179 afterTestCmd := exec.CommandContext(ctx, "go", goTestArgs...)
180 afterTestCmd.Dir = r.repoRoot
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000181 afterTestOut, _ := afterTestCmd.Output()
182 // unfortunately, we can't short-circuit here even if all tests pass,
183 // because we need to check for skipped tests.
Earl Lee2e463fb2025-04-17 11:22:22 -0700184
185 err := r.initializeInitialCommitWorktree(ctx)
186 if err != nil {
187 return "", err
188 }
189
190 beforeTestCmd := exec.CommandContext(ctx, "go", goTestArgs...)
191 beforeTestCmd.Dir = r.initialWorktree
192 beforeTestOut, _ := beforeTestCmd.Output() // ignore error, interesting info is in the output
193
194 // Parse the jsonl test results
195 beforeResults, beforeParseErr := parseTestResults(beforeTestOut)
196 if beforeParseErr != nil {
197 return "", fmt.Errorf("unable to parse test results for initial commit: %w\n%s", beforeParseErr, beforeTestOut)
198 }
199 afterResults, afterParseErr := parseTestResults(afterTestOut)
200 if afterParseErr != nil {
201 return "", fmt.Errorf("unable to parse test results for current commit: %w\n%s", afterParseErr, afterTestOut)
202 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700203 testRegressions, err := r.compareTestResults(beforeResults, afterResults)
204 if err != nil {
205 return "", fmt.Errorf("failed to compare test results: %w", err)
206 }
207 // TODO: better output formatting?
208 res := r.formatTestRegressions(testRegressions)
209 return res, nil
210}
211
Earl Lee2e463fb2025-04-17 11:22:22 -0700212// GoplsIssue represents a single issue reported by gopls check
213type GoplsIssue struct {
214 Position string // File position in format "file:line:col-range"
215 Message string // Description of the issue
216}
217
Josh Bleecher Snyderde5f7442025-05-15 18:32:32 +0000218// goplsIgnore contains substring patterns for gopls (and vet) diagnostic messages that should be suppressed.
219var goplsIgnore = []string{
220 // these are often just wrong, see https://github.com/golang/go/issues/57059#issuecomment-2884771470
221 "ends with redundant newline",
222
223 // as of May 2025, Claude doesn't understand strings/bytes.SplitSeq well enough to use it
224 "SplitSeq",
225}
226
Earl Lee2e463fb2025-04-17 11:22:22 -0700227// checkGopls runs gopls check on the provided files in both the current and initial state,
228// compares the results, and reports any new issues introduced in the current state.
229func (r *CodeReviewer) checkGopls(ctx context.Context, changedFiles []string) (string, error) {
230 if len(changedFiles) == 0 {
231 return "", nil // no files to check
232 }
233
234 // Filter out non-Go files as gopls only works on Go files
235 // and verify they still exist (not deleted)
236 var goFiles []string
237 for _, file := range changedFiles {
238 if !strings.HasSuffix(file, ".go") {
239 continue // not a Go file
240 }
241
242 // Check if the file still exists (not deleted)
243 if _, err := os.Stat(file); os.IsNotExist(err) {
244 continue // file doesn't exist anymore (deleted)
245 }
246
247 goFiles = append(goFiles, file)
248 }
249
250 if len(goFiles) == 0 {
251 return "", nil // no Go files to check
252 }
253
254 // Run gopls check on the current state
255 goplsArgs := append([]string{"check"}, goFiles...)
256
257 afterGoplsCmd := exec.CommandContext(ctx, "gopls", goplsArgs...)
258 afterGoplsCmd.Dir = r.repoRoot
259 afterGoplsOut, err := afterGoplsCmd.CombinedOutput() // gopls returns non-zero if it finds issues
260 if err != nil {
261 // Check if the output looks like real gopls issues or if it's just error output
262 if !looksLikeGoplsIssues(afterGoplsOut) {
263 slog.WarnContext(ctx, "CodeReviewer.checkGopls: gopls check failed to run properly", "err", err, "output", string(afterGoplsOut))
264 return "", nil // Skip rather than failing the entire code review
265 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700266 }
267
268 // Parse the output
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000269 afterIssues := parseGoplsOutput(r.repoRoot, afterGoplsOut)
Earl Lee2e463fb2025-04-17 11:22:22 -0700270
271 // If no issues were found, we're done
272 if len(afterIssues) == 0 {
273 return "", nil
274 }
275
276 // Gopls detected issues in the current state, check if they existed in the initial state
277 initErr := r.initializeInitialCommitWorktree(ctx)
278 if initErr != nil {
279 return "", err
280 }
281
282 // For each file that exists in the initial commit, run gopls check
283 var initialFilesToCheck []string
284 for _, file := range goFiles {
285 // Get relative path for git operations
286 relFile, err := filepath.Rel(r.repoRoot, file)
287 if err != nil {
288 slog.WarnContext(ctx, "CodeReviewer.checkGopls: failed to get relative path", "repo_root", r.repoRoot, "file", file, "err", err)
289 continue
290 }
291
292 // Check if the file exists in the initial commit
Philip Zeyliger49edc922025-05-14 09:45:45 -0700293 checkCmd := exec.CommandContext(ctx, "git", "cat-file", "-e", fmt.Sprintf("%s:%s", r.sketchBaseRef, relFile))
Earl Lee2e463fb2025-04-17 11:22:22 -0700294 checkCmd.Dir = r.repoRoot
295 if err := checkCmd.Run(); err == nil {
296 // File exists in initial commit
297 initialFilePath := filepath.Join(r.initialWorktree, relFile)
298 initialFilesToCheck = append(initialFilesToCheck, initialFilePath)
299 }
300 }
301
302 // Run gopls check on the files that existed in the initial commit
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000303 var beforeIssues []GoplsIssue
Earl Lee2e463fb2025-04-17 11:22:22 -0700304 if len(initialFilesToCheck) > 0 {
305 beforeGoplsArgs := append([]string{"check"}, initialFilesToCheck...)
306 beforeGoplsCmd := exec.CommandContext(ctx, "gopls", beforeGoplsArgs...)
307 beforeGoplsCmd.Dir = r.initialWorktree
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000308 beforeGoplsOut, beforeCmdErr := beforeGoplsCmd.CombinedOutput()
Earl Lee2e463fb2025-04-17 11:22:22 -0700309 if beforeCmdErr != nil && !looksLikeGoplsIssues(beforeGoplsOut) {
310 // If gopls fails to run properly on the initial commit, log a warning and continue
311 // with empty before issues - this will be conservative and report more issues
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000312 slog.WarnContext(ctx, "CodeReviewer.checkGopls: gopls check failed on initial commit", "err", err, "output", string(beforeGoplsOut))
Earl Lee2e463fb2025-04-17 11:22:22 -0700313 } else {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000314 beforeIssues = parseGoplsOutput(r.initialWorktree, beforeGoplsOut)
Earl Lee2e463fb2025-04-17 11:22:22 -0700315 }
316 }
317
318 // Find new issues that weren't present in the initial state
319 goplsRegressions := findGoplsRegressions(beforeIssues, afterIssues)
320 if len(goplsRegressions) == 0 {
321 return "", nil // no new issues
322 }
323
324 // Format the results
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000325 return r.formatGoplsRegressions(goplsRegressions), nil
Earl Lee2e463fb2025-04-17 11:22:22 -0700326}
327
Josh Bleecher Snyderde5f7442025-05-15 18:32:32 +0000328// parseGoplsOutput parses the text output from gopls check.
329// It drops any that match the patterns in goplsIgnore.
Earl Lee2e463fb2025-04-17 11:22:22 -0700330// Each line has the format: '/path/to/file.go:448:22-26: unused parameter: path'
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000331func parseGoplsOutput(root string, output []byte) []GoplsIssue {
Earl Lee2e463fb2025-04-17 11:22:22 -0700332 var issues []GoplsIssue
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +0000333 for line := range strings.Lines(string(output)) {
Earl Lee2e463fb2025-04-17 11:22:22 -0700334 line = strings.TrimSpace(line)
335 if line == "" {
336 continue
337 }
338
339 // Skip lines that look like error messages rather than gopls issues
340 if strings.HasPrefix(line, "Error:") ||
341 strings.HasPrefix(line, "Failed:") ||
342 strings.HasPrefix(line, "Warning:") ||
343 strings.HasPrefix(line, "gopls:") {
344 continue
345 }
346
347 // Find the first colon that separates the file path from the line number
348 firstColonIdx := strings.Index(line, ":")
349 if firstColonIdx < 0 {
350 continue // Invalid format
351 }
352
353 // Verify the part before the first colon looks like a file path
354 potentialPath := line[:firstColonIdx]
355 if !strings.HasSuffix(potentialPath, ".go") {
356 continue // Not a Go file path
357 }
358
359 // Find the position of the first message separator ': '
360 // This separates the position info from the message
361 messageStart := strings.Index(line, ": ")
362 if messageStart < 0 || messageStart <= firstColonIdx {
363 continue // Invalid format
364 }
365
366 // Extract position and message
367 position := line[:messageStart]
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000368 rel, err := filepath.Rel(root, position)
369 if err == nil {
370 position = rel
371 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700372 message := line[messageStart+2:] // Skip the ': ' separator
373
374 // Verify position has the expected format (at least 2 colons for line:col)
375 colonCount := strings.Count(position, ":")
376 if colonCount < 2 {
377 continue // Not enough position information
378 }
379
Josh Bleecher Snyderde5f7442025-05-15 18:32:32 +0000380 // Skip diagnostics that match any of our ignored patterns
381 if shouldIgnoreDiagnostic(message) {
382 continue
383 }
384
Earl Lee2e463fb2025-04-17 11:22:22 -0700385 issues = append(issues, GoplsIssue{
386 Position: position,
387 Message: message,
388 })
389 }
390
391 return issues
392}
393
394// looksLikeGoplsIssues checks if the output appears to be actual gopls issues
395// rather than error messages about gopls itself failing
396func looksLikeGoplsIssues(output []byte) bool {
397 // If output is empty, it's not valid issues
398 if len(output) == 0 {
399 return false
400 }
401
402 // Check if output has at least one line that looks like a gopls issue
403 // A gopls issue looks like: '/path/to/file.go:123:45-67: message'
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +0000404 for line := range strings.Lines(string(output)) {
Earl Lee2e463fb2025-04-17 11:22:22 -0700405 line = strings.TrimSpace(line)
406 if line == "" {
407 continue
408 }
409
410 // A gopls issue has at least two colons (file path, line number, column)
411 // and contains a colon followed by a space (separating position from message)
412 colonCount := strings.Count(line, ":")
413 hasSeparator := strings.Contains(line, ": ")
414
415 if colonCount >= 2 && hasSeparator {
416 // Check if it starts with a likely file path (ending in .go)
417 parts := strings.SplitN(line, ":", 2)
418 if strings.HasSuffix(parts[0], ".go") {
419 return true
420 }
421 }
422 }
423 return false
424}
425
426// normalizeGoplsPosition extracts just the file path from a position string
427func normalizeGoplsPosition(position string) string {
428 // Extract just the file path by taking everything before the first colon
429 parts := strings.Split(position, ":")
430 if len(parts) < 1 {
431 return position
432 }
433 return parts[0]
434}
435
436// findGoplsRegressions identifies gopls issues that are new in the after state
437func findGoplsRegressions(before, after []GoplsIssue) []GoplsIssue {
438 var regressions []GoplsIssue
439
440 // Build map of before issues for easier lookup
441 beforeIssueMap := make(map[string]map[string]bool) // file -> message -> exists
442 for _, issue := range before {
443 file := normalizeGoplsPosition(issue.Position)
444 if _, exists := beforeIssueMap[file]; !exists {
445 beforeIssueMap[file] = make(map[string]bool)
446 }
447 // Store both the exact message and the general issue type for fuzzy matching
448 beforeIssueMap[file][issue.Message] = true
449
450 // Extract the general issue type (everything before the first ':' in the message)
451 generalIssue := issue.Message
452 if colonIdx := strings.Index(issue.Message, ":"); colonIdx > 0 {
453 generalIssue = issue.Message[:colonIdx]
454 }
455 beforeIssueMap[file][generalIssue] = true
456 }
457
458 // Check each after issue to see if it's new
459 for _, afterIssue := range after {
460 file := normalizeGoplsPosition(afterIssue.Position)
461 isNew := true
462
463 if fileIssues, fileExists := beforeIssueMap[file]; fileExists {
464 // Check for exact message match
465 if fileIssues[afterIssue.Message] {
466 isNew = false
467 } else {
468 // Check for general issue type match
469 generalIssue := afterIssue.Message
470 if colonIdx := strings.Index(afterIssue.Message, ":"); colonIdx > 0 {
471 generalIssue = afterIssue.Message[:colonIdx]
472 }
473 if fileIssues[generalIssue] {
474 isNew = false
475 }
476 }
477 }
478
479 if isNew {
480 regressions = append(regressions, afterIssue)
481 }
482 }
483
484 // Sort regressions for deterministic output
485 slices.SortFunc(regressions, func(a, b GoplsIssue) int {
486 return strings.Compare(a.Position, b.Position)
487 })
488
489 return regressions
490}
491
492// formatGoplsRegressions generates a human-readable summary of gopls check regressions
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000493func (r *CodeReviewer) formatGoplsRegressions(regressions []GoplsIssue) string {
Earl Lee2e463fb2025-04-17 11:22:22 -0700494 if len(regressions) == 0 {
495 return ""
496 }
497
498 var sb strings.Builder
499 sb.WriteString("Gopls check issues detected:\n\n")
500
501 // Format each issue
502 for i, issue := range regressions {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000503 sb.WriteString(fmt.Sprintf("%d. %s: %s\n", i+1, filepath.Join(r.repoRoot, issue.Position), issue.Message))
Earl Lee2e463fb2025-04-17 11:22:22 -0700504 }
505
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000506 sb.WriteString("\nIMPORTANT: Only fix new gopls check issues in parts of the code that you have already edited.")
507 sb.WriteString(" Do not change existing code that was not part of your current edits.\n")
Earl Lee2e463fb2025-04-17 11:22:22 -0700508 return sb.String()
509}
510
511func (r *CodeReviewer) HasReviewed(commit string) bool {
512 return slices.Contains(r.reviewed, commit)
513}
514
515func (r *CodeReviewer) IsInitialCommit(commit string) bool {
Philip Zeyliger49edc922025-05-14 09:45:45 -0700516 return commit == r.sketchBaseRef
Earl Lee2e463fb2025-04-17 11:22:22 -0700517}
518
Philip Zeyliger49edc922025-05-14 09:45:45 -0700519// requireHEADDescendantOfSketchBaseRef returns an error if HEAD is not a descendant of r.initialCommit.
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000520// This serves two purposes:
521// - ensures we're not still on the initial commit
522// - ensures we're not on a separate branch or upstream somewhere, which would be confusing
Philip Zeyliger49edc922025-05-14 09:45:45 -0700523func (r *CodeReviewer) requireHEADDescendantOfSketchBaseRef(ctx context.Context) error {
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000524 head, err := r.CurrentCommit(ctx)
525 if err != nil {
526 return err
527 }
528
529 // Note: Git's merge-base --is-ancestor checks strict ancestry (i.e., <), so a commit is NOT an ancestor of itself.
Philip Zeyliger49edc922025-05-14 09:45:45 -0700530 cmd := exec.CommandContext(ctx, "git", "merge-base", "--is-ancestor", r.sketchBaseRef, head)
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000531 cmd.Dir = r.repoRoot
532 err = cmd.Run()
533 if err != nil {
534 if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
535 // Exit code 1 means not an ancestor
536 return fmt.Errorf("HEAD is not a descendant of the initial commit")
537 }
538 return fmt.Errorf("failed to check whether initial commit is ancestor: %w", err)
539 }
540 return nil
541}
542
Earl Lee2e463fb2025-04-17 11:22:22 -0700543// packagesForFiles returns maps of packages related to the given files:
544// 1. directPkgs: packages that directly contain the changed files
545// 2. allPkgs: all packages that might be affected, including downstream packages that depend on the direct packages
546// It may include false positives.
547// Files must be absolute paths!
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000548func (r *CodeReviewer) packagesForFiles(ctx context.Context, files []string) (allPkgs map[string]*packages.Package, err error) {
Earl Lee2e463fb2025-04-17 11:22:22 -0700549 for _, f := range files {
550 if !filepath.IsAbs(f) {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000551 return nil, fmt.Errorf("path %q is not absolute", f)
Earl Lee2e463fb2025-04-17 11:22:22 -0700552 }
553 }
554 cfg := &packages.Config{
555 Mode: packages.LoadImports | packages.NeedEmbedFiles,
556 Context: ctx,
557 // Logf: func(msg string, args ...any) {
558 // slog.DebugContext(ctx, "loading go packages", "msg", fmt.Sprintf(msg, args...))
559 // },
560 // TODO: in theory, go.mod might not be in the repo root, and there might be multiple go.mod files.
561 // We can cross that bridge when we get there.
562 Dir: r.repoRoot,
563 Tests: true,
564 }
565 universe, err := packages.Load(cfg, "./...")
566 if err != nil {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000567 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -0700568 }
569 // Identify packages that directly contain the changed files
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000570 directPkgs := make(map[string]*packages.Package) // import path -> package
Earl Lee2e463fb2025-04-17 11:22:22 -0700571 for _, pkg := range universe {
Earl Lee2e463fb2025-04-17 11:22:22 -0700572 pkgFiles := allFiles(pkg)
Earl Lee2e463fb2025-04-17 11:22:22 -0700573 for _, file := range files {
574 if pkgFiles[file] {
575 // prefer test packages, as they contain strictly more files (right?)
576 prev := directPkgs[pkg.PkgPath]
577 if prev == nil || prev.ForTest == "" {
578 directPkgs[pkg.PkgPath] = pkg
579 }
580 }
581 }
582 }
583
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000584 allPkgs = maps.Clone(directPkgs)
Earl Lee2e463fb2025-04-17 11:22:22 -0700585
586 // Add packages that depend on the direct packages
587 addDependentPackages(universe, allPkgs)
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000588 return allPkgs, nil
Earl Lee2e463fb2025-04-17 11:22:22 -0700589}
590
591// allFiles returns all files that might be referenced by the package.
592// It may contain false positives.
593func allFiles(p *packages.Package) map[string]bool {
594 files := make(map[string]bool)
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000595 // Add files from package info
Earl Lee2e463fb2025-04-17 11:22:22 -0700596 add := [][]string{p.GoFiles, p.CompiledGoFiles, p.OtherFiles, p.EmbedFiles, p.IgnoredFiles}
597 for _, extra := range add {
598 for _, file := range extra {
599 files[file] = true
600 }
601 }
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000602 // Add files from testdata directory
603 testdataDir := filepath.Join(p.Dir, "testdata")
604 if _, err := os.Stat(testdataDir); err == nil {
605 fsys := os.DirFS(p.Dir)
606 fs.WalkDir(fsys, "testdata", func(path string, d fs.DirEntry, err error) error {
607 if err == nil && !d.IsDir() {
608 files[filepath.Join(p.Dir, path)] = true
609 }
610 return nil
611 })
612 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700613 return files
614}
615
616// addDependentPackages adds to pkgs all packages from universe
617// that directly or indirectly depend on any package already in pkgs.
618func addDependentPackages(universe []*packages.Package, pkgs map[string]*packages.Package) {
619 for {
620 changed := false
621 for _, p := range universe {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000622 if strings.HasSuffix(p.PkgPath, ".test") { // ick, but I don't see another way
623 // skip test packages
624 continue
625 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700626 if _, ok := pkgs[p.PkgPath]; ok {
627 // already in pkgs
628 continue
629 }
630 for importPath := range p.Imports {
631 if _, ok := pkgs[importPath]; ok {
632 // imports a package dependent on pkgs, add it
633 pkgs[p.PkgPath] = p
634 changed = true
635 break
636 }
637 }
638 }
639 if !changed {
640 break
641 }
642 }
643}
644
645// testJSON is a union of BuildEvent and TestEvent
646type testJSON struct {
647 // TestEvent only:
648 // The Time field holds the time the event happened. It is conventionally omitted
649 // for cached test results.
650 Time time.Time `json:"Time"`
651 // BuildEvent only:
652 // The ImportPath field gives the package ID of the package being built.
653 // This matches the Package.ImportPath field of go list -json and the
654 // TestEvent.FailedBuild field of go test -json. Note that it does not
655 // match TestEvent.Package.
656 ImportPath string `json:"ImportPath"` // BuildEvent only
657 // TestEvent only:
658 // The Package field, if present, specifies the package being tested. When the
659 // go command runs parallel tests in -json mode, events from different tests are
660 // interlaced; the Package field allows readers to separate them.
661 Package string `json:"Package"`
662 // Action is used in both BuildEvent and TestEvent.
663 // It is the key to distinguishing between them.
664 // BuildEvent:
665 // build-output or build-fail
666 // TestEvent:
667 // start, run, pause, cont, pass, bench, fail, output, skip
668 Action string `json:"Action"`
669 // TestEvent only:
670 // The Test field, if present, specifies the test, example, or benchmark function
671 // that caused the event. Events for the overall package test do not set Test.
672 Test string `json:"Test"`
673 // TestEvent only:
674 // The Elapsed field is set for "pass" and "fail" events. It gives the time elapsed in seconds
675 // for the specific test or the overall package test that passed or failed.
676 Elapsed float64
677 // TestEvent:
678 // The Output field is set for Action == "output" and is a portion of the
679 // test's output (standard output and standard error merged together). The
680 // output is unmodified except that invalid UTF-8 output from a test is coerced
681 // into valid UTF-8 by use of replacement characters. With that one exception,
682 // the concatenation of the Output fields of all output events is the exact output
683 // of the test execution.
684 // BuildEvent:
685 // The Output field is set for Action == "build-output" and is a portion of
686 // the build's output. The concatenation of the Output fields of all output
687 // events is the exact output of the build. A single event may contain one
688 // or more lines of output and there may be more than one output event for
689 // a given ImportPath. This matches the definition of the TestEvent.Output
690 // field produced by go test -json.
691 Output string `json:"Output"`
692 // TestEvent only:
693 // The FailedBuild field is set for Action == "fail" if the test failure was caused
694 // by a build failure. It contains the package ID of the package that failed to
695 // build. This matches the ImportPath field of the "go list" output, as well as the
696 // BuildEvent.ImportPath field as emitted by "go build -json".
697 FailedBuild string `json:"FailedBuild"`
698}
699
700// parseTestResults converts test output in JSONL format into a slice of testJSON objects
701func parseTestResults(testOutput []byte) ([]testJSON, error) {
702 var results []testJSON
703 dec := json.NewDecoder(bytes.NewReader(testOutput))
704 for {
705 var event testJSON
706 if err := dec.Decode(&event); err != nil {
707 if err == io.EOF {
708 break
709 }
710 return nil, err
711 }
712 results = append(results, event)
713 }
714 return results, nil
715}
716
717// testStatus represents the status of a test in a given commit
718type testStatus int
719
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700720//go:generate go tool golang.org/x/tools/cmd/stringer -type=testStatus -trimprefix=testStatus
Earl Lee2e463fb2025-04-17 11:22:22 -0700721const (
722 testStatusUnknown testStatus = iota
723 testStatusPass
724 testStatusFail
725 testStatusBuildFail
726 testStatusSkip
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000727 testStatusNoTests // no tests exist for this package
Earl Lee2e463fb2025-04-17 11:22:22 -0700728)
729
Earl Lee2e463fb2025-04-17 11:22:22 -0700730// testRegression represents a test that regressed between commits
731type testRegression struct {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000732 Package string
733 Test string // empty for package tests
Earl Lee2e463fb2025-04-17 11:22:22 -0700734 BeforeStatus testStatus
735 AfterStatus testStatus
736 Output string // failure output in the after state
737}
738
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000739func (r *testRegression) Source() string {
740 if r.Test == "" {
741 return r.Package
Earl Lee2e463fb2025-04-17 11:22:22 -0700742 }
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000743 return fmt.Sprintf("%s.%s", r.Package, r.Test)
744}
Earl Lee2e463fb2025-04-17 11:22:22 -0700745
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000746type packageResult struct {
747 Status testStatus // overall status for the package
748 TestStatus map[string]testStatus // name -> status
749 TestOutput map[string][]string // name -> output parts
750}
Earl Lee2e463fb2025-04-17 11:22:22 -0700751
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000752// collectTestStatuses processes a slice of test events and returns rich status information
753func collectTestStatuses(results []testJSON) map[string]*packageResult {
754 m := make(map[string]*packageResult)
755
756 for _, event := range results {
757 pkg := event.Package
758 p, ok := m[pkg]
759 if !ok {
760 p = new(packageResult)
761 p.TestStatus = make(map[string]testStatus)
762 p.TestOutput = make(map[string][]string)
763 m[pkg] = p
Earl Lee2e463fb2025-04-17 11:22:22 -0700764 }
765
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000766 switch event.Action {
767 case "output":
768 p.TestOutput[event.Test] = append(p.TestOutput[event.Test], event.Output)
Earl Lee2e463fb2025-04-17 11:22:22 -0700769 continue
Earl Lee2e463fb2025-04-17 11:22:22 -0700770 case "pass":
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000771 if event.Test == "" {
772 p.Status = testStatusPass
773 } else {
774 p.TestStatus[event.Test] = testStatusPass
775 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700776 case "fail":
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000777 if event.Test == "" {
778 if event.FailedBuild != "" {
779 p.Status = testStatusBuildFail
780 } else {
781 p.Status = testStatusFail
782 }
783 } else {
784 p.TestStatus[event.Test] = testStatusFail
785 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700786 case "skip":
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000787 if event.Test == "" {
788 p.Status = testStatusNoTests
789 } else {
790 p.TestStatus[event.Test] = testStatusSkip
791 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700792 }
793 }
794
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000795 return m
Earl Lee2e463fb2025-04-17 11:22:22 -0700796}
797
798// compareTestResults identifies tests that have regressed between commits
799func (r *CodeReviewer) compareTestResults(beforeResults, afterResults []testJSON) ([]testRegression, error) {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000800 before := collectTestStatuses(beforeResults)
801 after := collectTestStatuses(afterResults)
802 var testLevelRegressions []testRegression
803 var packageLevelRegressions []testRegression
Earl Lee2e463fb2025-04-17 11:22:22 -0700804
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000805 afterPkgs := slices.Sorted(maps.Keys(after))
806 for _, pkg := range afterPkgs {
807 afterResult := after[pkg]
808 afterStatus := afterResult.Status
809 // Can't short-circuit here when tests are passing, because we need to check for skipped tests.
810 beforeResult, ok := before[pkg]
811 beforeStatus := testStatusNoTests
812 if ok {
813 beforeStatus = beforeResult.Status
814 }
815 // If things no longer build, stop at the package level.
816 // Otherwise, proceed to the test level, so we have more precise information.
817 if afterStatus == testStatusBuildFail && beforeStatus != testStatusBuildFail {
818 packageLevelRegressions = append(packageLevelRegressions, testRegression{
819 Package: pkg,
820 BeforeStatus: beforeStatus,
821 AfterStatus: afterStatus,
822 })
823 continue
824 }
825 tests := slices.Sorted(maps.Keys(afterResult.TestStatus))
826 for _, test := range tests {
827 afterStatus := afterResult.TestStatus[test]
828 switch afterStatus {
829 case testStatusPass:
830 continue
831 case testStatusUnknown:
832 slog.WarnContext(context.Background(), "unknown test status", "package", pkg, "test", test)
833 continue
834 }
835 beforeStatus := beforeResult.TestStatus[test]
836 if isRegression(beforeStatus, afterStatus) {
837 testLevelRegressions = append(testLevelRegressions, testRegression{
838 Package: pkg,
839 Test: test,
840 BeforeStatus: beforeStatus,
841 AfterStatus: afterStatus,
842 Output: strings.Join(afterResult.TestOutput[test], ""),
843 })
844 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700845 }
846 }
847
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000848 // If we have test-level regressions, report only those
849 // Otherwise, report package-level regressions
Earl Lee2e463fb2025-04-17 11:22:22 -0700850 var regressions []testRegression
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000851 if len(testLevelRegressions) > 0 {
852 regressions = testLevelRegressions
853 } else {
854 regressions = packageLevelRegressions
Earl Lee2e463fb2025-04-17 11:22:22 -0700855 }
856
857 // Sort regressions for consistent output
858 slices.SortFunc(regressions, func(a, b testRegression) int {
859 // First by package
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000860 if c := strings.Compare(a.Package, b.Package); c != 0 {
Earl Lee2e463fb2025-04-17 11:22:22 -0700861 return c
862 }
863 // Then by test name
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000864 return strings.Compare(a.Test, b.Test)
Earl Lee2e463fb2025-04-17 11:22:22 -0700865 })
866
867 return regressions, nil
868}
869
870// badnessLevels maps test status to a badness level
871// Higher values indicate worse status (more severe issues)
872var badnessLevels = map[testStatus]int{
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000873 testStatusBuildFail: 5, // Worst
874 testStatusFail: 4,
875 testStatusSkip: 3,
876 testStatusNoTests: 2,
Earl Lee2e463fb2025-04-17 11:22:22 -0700877 testStatusPass: 1,
878 testStatusUnknown: 0, // Least bad - avoids false positives
879}
880
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000881// regressionFormatter defines a mapping of before/after state pairs to descriptive messages
882type regressionKey struct {
883 before, after testStatus
884}
885
886var regressionMessages = map[regressionKey]string{
887 {testStatusUnknown, testStatusBuildFail}: "New test has build/vet errors",
888 {testStatusUnknown, testStatusFail}: "New test is failing",
889 {testStatusUnknown, testStatusSkip}: "New test is skipped",
890 {testStatusPass, testStatusBuildFail}: "Was passing, now has build/vet errors",
891 {testStatusPass, testStatusFail}: "Was passing, now failing",
892 {testStatusPass, testStatusSkip}: "Was passing, now skipped",
893 {testStatusNoTests, testStatusBuildFail}: "Previously had no tests, now has build/vet errors",
894 {testStatusNoTests, testStatusFail}: "Previously had no tests, now has failing tests",
895 {testStatusNoTests, testStatusSkip}: "Previously had no tests, now has skipped tests",
896 {testStatusSkip, testStatusBuildFail}: "Was skipped, now has build/vet errors",
897 {testStatusSkip, testStatusFail}: "Was skipped, now failing",
898 {testStatusFail, testStatusBuildFail}: "Was failing, now has build/vet errors",
899}
900
Earl Lee2e463fb2025-04-17 11:22:22 -0700901// isRegression determines if a test has regressed based on before and after status
902// A regression is defined as an increase in badness level
903func isRegression(before, after testStatus) bool {
904 // Higher badness level means worse status
905 return badnessLevels[after] > badnessLevels[before]
906}
907
908// formatTestRegressions generates a human-readable summary of test regressions
909func (r *CodeReviewer) formatTestRegressions(regressions []testRegression) string {
910 if len(regressions) == 0 {
911 return ""
912 }
913
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000914 buf := new(strings.Builder)
Philip Zeyliger49edc922025-05-14 09:45:45 -0700915 fmt.Fprintf(buf, "Test regressions detected between initial commit (%s) and HEAD:\n\n", r.sketchBaseRef)
Earl Lee2e463fb2025-04-17 11:22:22 -0700916
917 for i, reg := range regressions {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000918 fmt.Fprintf(buf, "%d: %v: ", i+1, reg.Source())
919 key := regressionKey{reg.BeforeStatus, reg.AfterStatus}
920 message, exists := regressionMessages[key]
921 if !exists {
922 message = "Regression detected"
Earl Lee2e463fb2025-04-17 11:22:22 -0700923 }
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000924 fmt.Fprintf(buf, "%s\n", message)
Earl Lee2e463fb2025-04-17 11:22:22 -0700925 }
926
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000927 return buf.String()
Earl Lee2e463fb2025-04-17 11:22:22 -0700928}
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000929
930// RelatedFile represents a file historically related to the changed files
931type RelatedFile struct {
932 Path string // Path to the file
933 Correlation float64 // Correlation score (0.0-1.0)
934}
935
936// findRelatedFiles identifies files that are historically related to the changed files
937// by analyzing git commit history for co-occurrences.
938func (r *CodeReviewer) findRelatedFiles(ctx context.Context, changedFiles []string) ([]RelatedFile, error) {
939 commits, err := r.getCommitsTouchingFiles(ctx, changedFiles)
940 if err != nil {
941 return nil, fmt.Errorf("failed to get commits touching files: %w", err)
942 }
943 if len(commits) == 0 {
944 return nil, nil
945 }
946
947 relChanged := make(map[string]bool, len(changedFiles))
948 for _, file := range changedFiles {
949 rel, err := filepath.Rel(r.repoRoot, file)
950 if err != nil {
951 return nil, fmt.Errorf("failed to get relative path for %s: %w", file, err)
952 }
953 relChanged[rel] = true
954 }
955
956 historyFiles := make(map[string]int)
957 var historyMu sync.Mutex
958
959 maxWorkers := runtime.GOMAXPROCS(0)
960 semaphore := make(chan bool, maxWorkers)
961 var wg sync.WaitGroup
962
963 for _, commit := range commits {
964 wg.Add(1)
965 semaphore <- true // acquire
966
967 go func(commit string) {
968 defer wg.Done()
969 defer func() { <-semaphore }() // release
970 commitFiles, err := r.getFilesInCommit(ctx, commit)
971 if err != nil {
972 slog.WarnContext(ctx, "Failed to get files in commit", "commit", commit, "err", err)
973 return
974 }
975 incr := 0
976 for _, file := range commitFiles {
977 if relChanged[file] {
978 incr++
979 }
980 }
981 if incr == 0 {
982 return
983 }
984 historyMu.Lock()
985 defer historyMu.Unlock()
986 for _, file := range commitFiles {
987 historyFiles[file] += incr
988 }
989 }(commit)
990 }
991 wg.Wait()
992
993 // normalize
994 maxCount := 0
995 for _, count := range historyFiles {
996 maxCount = max(maxCount, count)
997 }
998 if maxCount == 0 {
999 return nil, nil
1000 }
1001
1002 var relatedFiles []RelatedFile
1003 for file, count := range historyFiles {
1004 if relChanged[file] {
1005 // Don't include inputs in the output.
1006 continue
1007 }
1008 correlation := float64(count) / float64(maxCount)
1009 // Require min correlation to avoid noise
1010 if correlation >= 0.1 {
Pokey Rule75f93a12025-05-15 13:51:55 +00001011 // Check if the file still exists in the repository
1012 fullPath := filepath.Join(r.repoRoot, file)
1013 if _, err := os.Stat(fullPath); err == nil {
1014 relatedFiles = append(relatedFiles, RelatedFile{Path: file, Correlation: correlation})
1015 }
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +00001016 }
1017 }
1018
Josh Bleecher Snydera727f702025-06-04 18:47:33 -07001019 // Highest correlation first, then sort by path.
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +00001020 slices.SortFunc(relatedFiles, func(a, b RelatedFile) int {
Josh Bleecher Snydera727f702025-06-04 18:47:33 -07001021 return cmp.Or(
1022 -1*cmp.Compare(a.Correlation, b.Correlation),
1023 cmp.Compare(a.Path, b.Path),
1024 )
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +00001025 })
1026
1027 // Limit to 1 correlated file per input file.
1028 // (Arbitrary limit, to be adjusted.)
1029 maxFiles := len(changedFiles)
1030 if len(relatedFiles) > maxFiles {
1031 relatedFiles = relatedFiles[:maxFiles]
1032 }
1033
1034 // TODO: add an LLM in the mix here (like the keyword search tool) to do a filtering pass,
1035 // and then increase the strength of the wording in the relatedFiles message.
1036
1037 return relatedFiles, nil
1038}
1039
1040// getCommitsTouchingFiles returns all commits that touch any of the specified files
1041func (r *CodeReviewer) getCommitsTouchingFiles(ctx context.Context, files []string) ([]string, error) {
1042 if len(files) == 0 {
1043 return nil, nil
1044 }
Josh Bleecher Snyderd4b1cc72025-04-29 13:49:18 +00001045 fileArgs := append([]string{"rev-list", "--all", "--date-order", "--max-count=100", "--"}, files...)
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +00001046 cmd := exec.CommandContext(ctx, "git", fileArgs...)
1047 cmd.Dir = r.repoRoot
1048 out, err := cmd.CombinedOutput()
1049 if err != nil {
1050 return nil, fmt.Errorf("failed to get commits: %w\n%s", err, out)
1051 }
1052 return nonEmptyTrimmedLines(out), nil
1053}
1054
1055// getFilesInCommit returns all files changed in a specific commit
1056func (r *CodeReviewer) getFilesInCommit(ctx context.Context, commit string) ([]string, error) {
1057 cmd := exec.CommandContext(ctx, "git", "diff-tree", "--no-commit-id", "--name-only", "-r", commit)
1058 cmd.Dir = r.repoRoot
1059 out, err := cmd.CombinedOutput()
1060 if err != nil {
1061 return nil, fmt.Errorf("failed to get files in commit: %w\n%s", err, out)
1062 }
1063 return nonEmptyTrimmedLines(out), nil
1064}
1065
1066func nonEmptyTrimmedLines(b []byte) []string {
1067 var lines []string
1068 for line := range strings.Lines(string(b)) {
1069 line = strings.TrimSpace(line)
1070 if line != "" {
1071 lines = append(lines, line)
1072 }
1073 }
1074 return lines
1075}
1076
1077// formatRelatedFiles formats the related files list into a human-readable message
1078func (r *CodeReviewer) formatRelatedFiles(files []RelatedFile) string {
1079 if len(files) == 0 {
1080 return ""
1081 }
1082
1083 buf := new(strings.Builder)
1084
1085 fmt.Fprintf(buf, "Potentially related files:\n\n")
1086
1087 for _, file := range files {
1088 fmt.Fprintf(buf, "- %s (%0.0f%%)\n", file.Path, 100*file.Correlation)
1089 }
1090
1091 fmt.Fprintf(buf, "\nThese files have historically changed with the files you have modified. Consider whether they require updates as well.\n")
1092 return buf.String()
1093}
Josh Bleecher Snyderde5f7442025-05-15 18:32:32 +00001094
1095// shouldIgnoreDiagnostic reports whether a diagnostic message matches any of the patterns in goplsIgnore.
1096func shouldIgnoreDiagnostic(message string) bool {
1097 for _, pattern := range goplsIgnore {
1098 if strings.Contains(message, pattern) {
1099 return true
1100 }
1101 }
1102 return false
1103}