blob: a4510b8c40f11367ca1b551c006a2c1340a24db6 [file] [log] [blame]
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +00001package codereview
Earl Lee2e463fb2025-04-17 11:22:22 -07002
3import (
4 "bytes"
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +00005 "cmp"
Earl Lee2e463fb2025-04-17 11:22:22 -07006 "context"
7 "encoding/json"
8 "fmt"
9 "io"
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +000010 "io/fs"
Earl Lee2e463fb2025-04-17 11:22:22 -070011 "log/slog"
12 "maps"
13 "os"
14 "os/exec"
15 "path/filepath"
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +000016 "runtime"
Earl Lee2e463fb2025-04-17 11:22:22 -070017 "slices"
18 "strings"
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +000019 "sync"
Earl Lee2e463fb2025-04-17 11:22:22 -070020 "time"
21
22 "golang.org/x/tools/go/packages"
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070023 "sketch.dev/llm"
Earl Lee2e463fb2025-04-17 11:22:22 -070024)
25
26// This file does differential quality analysis of a commit relative to a base commit.
27
28// Tool returns a tool spec for a CodeReview tool backed by r.
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070029func (r *CodeReviewer) Tool() *llm.Tool {
30 spec := &llm.Tool{
Earl Lee2e463fb2025-04-17 11:22:22 -070031 Name: "codereview",
32 Description: `Run an automated code review.`,
33 // If you modify this, update the termui template for prettier rendering.
Josh Bleecher Snyder74d690e2025-05-14 18:16:03 -070034 InputSchema: llm.EmptySchema(),
Earl Lee2e463fb2025-04-17 11:22:22 -070035 Run: r.Run,
36 }
37 return spec
38}
39
Philip Zeyliger72252cb2025-05-10 17:00:08 -070040func (r *CodeReviewer) Run(ctx context.Context, m json.RawMessage) ([]llm.Content, error) {
Josh Bleecher Snyder2dc86b92025-04-29 14:11:58 +000041 // NOTE: If you add or modify error messages here, update the corresponding UI parsing in:
42 // webui/src/web-components/sketch-tool-card.ts (SketchToolCardCodeReview.getStatusIcon)
Earl Lee2e463fb2025-04-17 11:22:22 -070043 if err := r.RequireNormalGitState(ctx); err != nil {
44 slog.DebugContext(ctx, "CodeReviewer.Run: failed to check for normal git state", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -070045 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -070046 }
47 if err := r.RequireNoUncommittedChanges(ctx); err != nil {
48 slog.DebugContext(ctx, "CodeReviewer.Run: failed to check for uncommitted changes", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -070049 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -070050 }
51
52 // Check that the current commit is not the initial commit
53 currentCommit, err := r.CurrentCommit(ctx)
54 if err != nil {
55 slog.DebugContext(ctx, "CodeReviewer.Run: failed to get current commit", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -070056 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -070057 }
58 if r.IsInitialCommit(currentCommit) {
59 slog.DebugContext(ctx, "CodeReviewer.Run: current commit is initial commit, nothing to review")
Philip Zeyliger72252cb2025-05-10 17:00:08 -070060 return nil, fmt.Errorf("no new commits have been added, nothing to review")
Earl Lee2e463fb2025-04-17 11:22:22 -070061 }
62
63 // No matter what failures happen from here out, we will declare this to have been reviewed.
64 // This should help avoid the model getting blocked by a broken code review tool.
65 r.reviewed = append(r.reviewed, currentCommit)
66
Philip Zeyliger49edc922025-05-14 09:45:45 -070067 changedFiles, err := r.changedFiles(ctx, r.sketchBaseRef, currentCommit)
Earl Lee2e463fb2025-04-17 11:22:22 -070068 if err != nil {
69 slog.DebugContext(ctx, "CodeReviewer.Run: failed to get changed files", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -070070 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -070071 }
72
73 // Prepare to analyze before/after for the impacted files.
74 // We use the current commit to determine what packages exist and are impacted.
75 // The packages in the initial commit may be different.
76 // Good enough for now.
77 // TODO: do better
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +000078 allPkgs, err := r.packagesForFiles(ctx, changedFiles)
Earl Lee2e463fb2025-04-17 11:22:22 -070079 if err != nil {
80 // TODO: log and skip to stuff that doesn't require packages
81 slog.DebugContext(ctx, "CodeReviewer.Run: failed to get packages for files", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -070082 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -070083 }
84 allPkgList := slices.Collect(maps.Keys(allPkgs))
Earl Lee2e463fb2025-04-17 11:22:22 -070085
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +000086 var errorMessages []string // problems we want the model to address
87 var infoMessages []string // info the model should consider
88
89 // Find potentially related files that should also be considered
90 // TODO: add some caching here, since this depends only on the initial commit and the changed files, not the details of the changes
91 relatedFiles, err := r.findRelatedFiles(ctx, changedFiles)
92 if err != nil {
93 slog.DebugContext(ctx, "CodeReviewer.Run: failed to find related files", "err", err)
94 } else {
95 relatedMsg := r.formatRelatedFiles(relatedFiles)
96 if relatedMsg != "" {
97 infoMessages = append(infoMessages, relatedMsg)
98 }
99 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700100
101 testMsg, err := r.checkTests(ctx, allPkgList)
102 if err != nil {
103 slog.DebugContext(ctx, "CodeReviewer.Run: failed to check tests", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700104 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -0700105 }
106 if testMsg != "" {
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000107 errorMessages = append(errorMessages, testMsg)
Earl Lee2e463fb2025-04-17 11:22:22 -0700108 }
109
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000110 goplsMsg, err := r.checkGopls(ctx, changedFiles) // includes vet checks
Earl Lee2e463fb2025-04-17 11:22:22 -0700111 if err != nil {
112 slog.DebugContext(ctx, "CodeReviewer.Run: failed to check gopls", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700113 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -0700114 }
115 if goplsMsg != "" {
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000116 errorMessages = append(errorMessages, goplsMsg)
Earl Lee2e463fb2025-04-17 11:22:22 -0700117 }
118
Josh Bleecher Snyder2dc86b92025-04-29 14:11:58 +0000119 // NOTE: If you change this output format, update the corresponding UI parsing in:
120 // webui/src/web-components/sketch-tool-card.ts (SketchToolCardCodeReview.getStatusIcon)
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000121 buf := new(strings.Builder)
122 if len(infoMessages) > 0 {
123 buf.WriteString("# Info\n\n")
124 buf.WriteString(strings.Join(infoMessages, "\n\n"))
125 buf.WriteString("\n\n")
Earl Lee2e463fb2025-04-17 11:22:22 -0700126 }
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000127 if len(errorMessages) > 0 {
128 buf.WriteString("# Errors\n\n")
129 buf.WriteString(strings.Join(errorMessages, "\n\n"))
130 buf.WriteString("\n\nPlease fix before proceeding.\n")
131 }
132 if buf.Len() == 0 {
133 buf.WriteString("OK")
134 }
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700135 return llm.TextContent(buf.String()), nil
Earl Lee2e463fb2025-04-17 11:22:22 -0700136}
137
138func (r *CodeReviewer) initializeInitialCommitWorktree(ctx context.Context) error {
139 if r.initialWorktree != "" {
140 return nil
141 }
142 tmpDir, err := os.MkdirTemp("", "sketch-codereview-worktree")
143 if err != nil {
144 return err
145 }
Philip Zeyliger49edc922025-05-14 09:45:45 -0700146 worktreeCmd := exec.CommandContext(ctx, "git", "worktree", "add", "--detach", tmpDir, r.sketchBaseRef)
Earl Lee2e463fb2025-04-17 11:22:22 -0700147 worktreeCmd.Dir = r.repoRoot
148 out, err := worktreeCmd.CombinedOutput()
149 if err != nil {
150 return fmt.Errorf("unable to create worktree for initial commit: %w\n%s", err, out)
151 }
152 r.initialWorktree = tmpDir
153 return nil
154}
155
156func (r *CodeReviewer) checkTests(ctx context.Context, pkgList []string) (string, error) {
Josh Bleecher Snyderde5f7442025-05-15 18:32:32 +0000157 // 'gopls check' covers everything that 'go vet' covers.
158 // Disabling vet here speeds things up, and allows more precise filtering and reporting.
159 goTestArgs := []string{"test", "-json", "-v", "-vet=off"}
Earl Lee2e463fb2025-04-17 11:22:22 -0700160 goTestArgs = append(goTestArgs, pkgList...)
161
162 afterTestCmd := exec.CommandContext(ctx, "go", goTestArgs...)
163 afterTestCmd.Dir = r.repoRoot
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000164 afterTestOut, _ := afterTestCmd.Output()
165 // unfortunately, we can't short-circuit here even if all tests pass,
166 // because we need to check for skipped tests.
Earl Lee2e463fb2025-04-17 11:22:22 -0700167
168 err := r.initializeInitialCommitWorktree(ctx)
169 if err != nil {
170 return "", err
171 }
172
173 beforeTestCmd := exec.CommandContext(ctx, "go", goTestArgs...)
174 beforeTestCmd.Dir = r.initialWorktree
175 beforeTestOut, _ := beforeTestCmd.Output() // ignore error, interesting info is in the output
176
177 // Parse the jsonl test results
178 beforeResults, beforeParseErr := parseTestResults(beforeTestOut)
179 if beforeParseErr != nil {
180 return "", fmt.Errorf("unable to parse test results for initial commit: %w\n%s", beforeParseErr, beforeTestOut)
181 }
182 afterResults, afterParseErr := parseTestResults(afterTestOut)
183 if afterParseErr != nil {
184 return "", fmt.Errorf("unable to parse test results for current commit: %w\n%s", afterParseErr, afterTestOut)
185 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700186 testRegressions, err := r.compareTestResults(beforeResults, afterResults)
187 if err != nil {
188 return "", fmt.Errorf("failed to compare test results: %w", err)
189 }
190 // TODO: better output formatting?
191 res := r.formatTestRegressions(testRegressions)
192 return res, nil
193}
194
Earl Lee2e463fb2025-04-17 11:22:22 -0700195// GoplsIssue represents a single issue reported by gopls check
196type GoplsIssue struct {
197 Position string // File position in format "file:line:col-range"
198 Message string // Description of the issue
199}
200
Josh Bleecher Snyderde5f7442025-05-15 18:32:32 +0000201// goplsIgnore contains substring patterns for gopls (and vet) diagnostic messages that should be suppressed.
202var goplsIgnore = []string{
203 // these are often just wrong, see https://github.com/golang/go/issues/57059#issuecomment-2884771470
204 "ends with redundant newline",
205
206 // as of May 2025, Claude doesn't understand strings/bytes.SplitSeq well enough to use it
207 "SplitSeq",
208}
209
Earl Lee2e463fb2025-04-17 11:22:22 -0700210// checkGopls runs gopls check on the provided files in both the current and initial state,
211// compares the results, and reports any new issues introduced in the current state.
212func (r *CodeReviewer) checkGopls(ctx context.Context, changedFiles []string) (string, error) {
213 if len(changedFiles) == 0 {
214 return "", nil // no files to check
215 }
216
217 // Filter out non-Go files as gopls only works on Go files
218 // and verify they still exist (not deleted)
219 var goFiles []string
220 for _, file := range changedFiles {
221 if !strings.HasSuffix(file, ".go") {
222 continue // not a Go file
223 }
224
225 // Check if the file still exists (not deleted)
226 if _, err := os.Stat(file); os.IsNotExist(err) {
227 continue // file doesn't exist anymore (deleted)
228 }
229
230 goFiles = append(goFiles, file)
231 }
232
233 if len(goFiles) == 0 {
234 return "", nil // no Go files to check
235 }
236
237 // Run gopls check on the current state
238 goplsArgs := append([]string{"check"}, goFiles...)
239
240 afterGoplsCmd := exec.CommandContext(ctx, "gopls", goplsArgs...)
241 afterGoplsCmd.Dir = r.repoRoot
242 afterGoplsOut, err := afterGoplsCmd.CombinedOutput() // gopls returns non-zero if it finds issues
243 if err != nil {
244 // Check if the output looks like real gopls issues or if it's just error output
245 if !looksLikeGoplsIssues(afterGoplsOut) {
246 slog.WarnContext(ctx, "CodeReviewer.checkGopls: gopls check failed to run properly", "err", err, "output", string(afterGoplsOut))
247 return "", nil // Skip rather than failing the entire code review
248 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700249 }
250
251 // Parse the output
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000252 afterIssues := parseGoplsOutput(r.repoRoot, afterGoplsOut)
Earl Lee2e463fb2025-04-17 11:22:22 -0700253
254 // If no issues were found, we're done
255 if len(afterIssues) == 0 {
256 return "", nil
257 }
258
259 // Gopls detected issues in the current state, check if they existed in the initial state
260 initErr := r.initializeInitialCommitWorktree(ctx)
261 if initErr != nil {
262 return "", err
263 }
264
265 // For each file that exists in the initial commit, run gopls check
266 var initialFilesToCheck []string
267 for _, file := range goFiles {
268 // Get relative path for git operations
269 relFile, err := filepath.Rel(r.repoRoot, file)
270 if err != nil {
271 slog.WarnContext(ctx, "CodeReviewer.checkGopls: failed to get relative path", "repo_root", r.repoRoot, "file", file, "err", err)
272 continue
273 }
274
275 // Check if the file exists in the initial commit
Philip Zeyliger49edc922025-05-14 09:45:45 -0700276 checkCmd := exec.CommandContext(ctx, "git", "cat-file", "-e", fmt.Sprintf("%s:%s", r.sketchBaseRef, relFile))
Earl Lee2e463fb2025-04-17 11:22:22 -0700277 checkCmd.Dir = r.repoRoot
278 if err := checkCmd.Run(); err == nil {
279 // File exists in initial commit
280 initialFilePath := filepath.Join(r.initialWorktree, relFile)
281 initialFilesToCheck = append(initialFilesToCheck, initialFilePath)
282 }
283 }
284
285 // Run gopls check on the files that existed in the initial commit
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000286 var beforeIssues []GoplsIssue
Earl Lee2e463fb2025-04-17 11:22:22 -0700287 if len(initialFilesToCheck) > 0 {
288 beforeGoplsArgs := append([]string{"check"}, initialFilesToCheck...)
289 beforeGoplsCmd := exec.CommandContext(ctx, "gopls", beforeGoplsArgs...)
290 beforeGoplsCmd.Dir = r.initialWorktree
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000291 beforeGoplsOut, beforeCmdErr := beforeGoplsCmd.CombinedOutput()
Earl Lee2e463fb2025-04-17 11:22:22 -0700292 if beforeCmdErr != nil && !looksLikeGoplsIssues(beforeGoplsOut) {
293 // If gopls fails to run properly on the initial commit, log a warning and continue
294 // with empty before issues - this will be conservative and report more issues
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000295 slog.WarnContext(ctx, "CodeReviewer.checkGopls: gopls check failed on initial commit", "err", err, "output", string(beforeGoplsOut))
Earl Lee2e463fb2025-04-17 11:22:22 -0700296 } else {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000297 beforeIssues = parseGoplsOutput(r.initialWorktree, beforeGoplsOut)
Earl Lee2e463fb2025-04-17 11:22:22 -0700298 }
299 }
300
301 // Find new issues that weren't present in the initial state
302 goplsRegressions := findGoplsRegressions(beforeIssues, afterIssues)
303 if len(goplsRegressions) == 0 {
304 return "", nil // no new issues
305 }
306
307 // Format the results
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000308 return r.formatGoplsRegressions(goplsRegressions), nil
Earl Lee2e463fb2025-04-17 11:22:22 -0700309}
310
Josh Bleecher Snyderde5f7442025-05-15 18:32:32 +0000311// parseGoplsOutput parses the text output from gopls check.
312// It drops any that match the patterns in goplsIgnore.
Earl Lee2e463fb2025-04-17 11:22:22 -0700313// Each line has the format: '/path/to/file.go:448:22-26: unused parameter: path'
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000314func parseGoplsOutput(root string, output []byte) []GoplsIssue {
Earl Lee2e463fb2025-04-17 11:22:22 -0700315 var issues []GoplsIssue
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +0000316 for line := range strings.Lines(string(output)) {
Earl Lee2e463fb2025-04-17 11:22:22 -0700317 line = strings.TrimSpace(line)
318 if line == "" {
319 continue
320 }
321
322 // Skip lines that look like error messages rather than gopls issues
323 if strings.HasPrefix(line, "Error:") ||
324 strings.HasPrefix(line, "Failed:") ||
325 strings.HasPrefix(line, "Warning:") ||
326 strings.HasPrefix(line, "gopls:") {
327 continue
328 }
329
330 // Find the first colon that separates the file path from the line number
331 firstColonIdx := strings.Index(line, ":")
332 if firstColonIdx < 0 {
333 continue // Invalid format
334 }
335
336 // Verify the part before the first colon looks like a file path
337 potentialPath := line[:firstColonIdx]
338 if !strings.HasSuffix(potentialPath, ".go") {
339 continue // Not a Go file path
340 }
341
342 // Find the position of the first message separator ': '
343 // This separates the position info from the message
344 messageStart := strings.Index(line, ": ")
345 if messageStart < 0 || messageStart <= firstColonIdx {
346 continue // Invalid format
347 }
348
349 // Extract position and message
350 position := line[:messageStart]
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000351 rel, err := filepath.Rel(root, position)
352 if err == nil {
353 position = rel
354 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700355 message := line[messageStart+2:] // Skip the ': ' separator
356
357 // Verify position has the expected format (at least 2 colons for line:col)
358 colonCount := strings.Count(position, ":")
359 if colonCount < 2 {
360 continue // Not enough position information
361 }
362
Josh Bleecher Snyderde5f7442025-05-15 18:32:32 +0000363 // Skip diagnostics that match any of our ignored patterns
364 if shouldIgnoreDiagnostic(message) {
365 continue
366 }
367
Earl Lee2e463fb2025-04-17 11:22:22 -0700368 issues = append(issues, GoplsIssue{
369 Position: position,
370 Message: message,
371 })
372 }
373
374 return issues
375}
376
377// looksLikeGoplsIssues checks if the output appears to be actual gopls issues
378// rather than error messages about gopls itself failing
379func looksLikeGoplsIssues(output []byte) bool {
380 // If output is empty, it's not valid issues
381 if len(output) == 0 {
382 return false
383 }
384
385 // Check if output has at least one line that looks like a gopls issue
386 // A gopls issue looks like: '/path/to/file.go:123:45-67: message'
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +0000387 for line := range strings.Lines(string(output)) {
Earl Lee2e463fb2025-04-17 11:22:22 -0700388 line = strings.TrimSpace(line)
389 if line == "" {
390 continue
391 }
392
393 // A gopls issue has at least two colons (file path, line number, column)
394 // and contains a colon followed by a space (separating position from message)
395 colonCount := strings.Count(line, ":")
396 hasSeparator := strings.Contains(line, ": ")
397
398 if colonCount >= 2 && hasSeparator {
399 // Check if it starts with a likely file path (ending in .go)
400 parts := strings.SplitN(line, ":", 2)
401 if strings.HasSuffix(parts[0], ".go") {
402 return true
403 }
404 }
405 }
406 return false
407}
408
409// normalizeGoplsPosition extracts just the file path from a position string
410func normalizeGoplsPosition(position string) string {
411 // Extract just the file path by taking everything before the first colon
412 parts := strings.Split(position, ":")
413 if len(parts) < 1 {
414 return position
415 }
416 return parts[0]
417}
418
419// findGoplsRegressions identifies gopls issues that are new in the after state
420func findGoplsRegressions(before, after []GoplsIssue) []GoplsIssue {
421 var regressions []GoplsIssue
422
423 // Build map of before issues for easier lookup
424 beforeIssueMap := make(map[string]map[string]bool) // file -> message -> exists
425 for _, issue := range before {
426 file := normalizeGoplsPosition(issue.Position)
427 if _, exists := beforeIssueMap[file]; !exists {
428 beforeIssueMap[file] = make(map[string]bool)
429 }
430 // Store both the exact message and the general issue type for fuzzy matching
431 beforeIssueMap[file][issue.Message] = true
432
433 // Extract the general issue type (everything before the first ':' in the message)
434 generalIssue := issue.Message
435 if colonIdx := strings.Index(issue.Message, ":"); colonIdx > 0 {
436 generalIssue = issue.Message[:colonIdx]
437 }
438 beforeIssueMap[file][generalIssue] = true
439 }
440
441 // Check each after issue to see if it's new
442 for _, afterIssue := range after {
443 file := normalizeGoplsPosition(afterIssue.Position)
444 isNew := true
445
446 if fileIssues, fileExists := beforeIssueMap[file]; fileExists {
447 // Check for exact message match
448 if fileIssues[afterIssue.Message] {
449 isNew = false
450 } else {
451 // Check for general issue type match
452 generalIssue := afterIssue.Message
453 if colonIdx := strings.Index(afterIssue.Message, ":"); colonIdx > 0 {
454 generalIssue = afterIssue.Message[:colonIdx]
455 }
456 if fileIssues[generalIssue] {
457 isNew = false
458 }
459 }
460 }
461
462 if isNew {
463 regressions = append(regressions, afterIssue)
464 }
465 }
466
467 // Sort regressions for deterministic output
468 slices.SortFunc(regressions, func(a, b GoplsIssue) int {
469 return strings.Compare(a.Position, b.Position)
470 })
471
472 return regressions
473}
474
475// formatGoplsRegressions generates a human-readable summary of gopls check regressions
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000476func (r *CodeReviewer) formatGoplsRegressions(regressions []GoplsIssue) string {
Earl Lee2e463fb2025-04-17 11:22:22 -0700477 if len(regressions) == 0 {
478 return ""
479 }
480
481 var sb strings.Builder
482 sb.WriteString("Gopls check issues detected:\n\n")
483
484 // Format each issue
485 for i, issue := range regressions {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000486 sb.WriteString(fmt.Sprintf("%d. %s: %s\n", i+1, filepath.Join(r.repoRoot, issue.Position), issue.Message))
Earl Lee2e463fb2025-04-17 11:22:22 -0700487 }
488
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000489 sb.WriteString("\nIMPORTANT: Only fix new gopls check issues in parts of the code that you have already edited.")
490 sb.WriteString(" Do not change existing code that was not part of your current edits.\n")
Earl Lee2e463fb2025-04-17 11:22:22 -0700491 return sb.String()
492}
493
494func (r *CodeReviewer) HasReviewed(commit string) bool {
495 return slices.Contains(r.reviewed, commit)
496}
497
498func (r *CodeReviewer) IsInitialCommit(commit string) bool {
Philip Zeyliger49edc922025-05-14 09:45:45 -0700499 return commit == r.sketchBaseRef
Earl Lee2e463fb2025-04-17 11:22:22 -0700500}
501
Philip Zeyliger49edc922025-05-14 09:45:45 -0700502// requireHEADDescendantOfSketchBaseRef returns an error if HEAD is not a descendant of r.initialCommit.
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000503// This serves two purposes:
504// - ensures we're not still on the initial commit
505// - ensures we're not on a separate branch or upstream somewhere, which would be confusing
Philip Zeyliger49edc922025-05-14 09:45:45 -0700506func (r *CodeReviewer) requireHEADDescendantOfSketchBaseRef(ctx context.Context) error {
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000507 head, err := r.CurrentCommit(ctx)
508 if err != nil {
509 return err
510 }
511
512 // Note: Git's merge-base --is-ancestor checks strict ancestry (i.e., <), so a commit is NOT an ancestor of itself.
Philip Zeyliger49edc922025-05-14 09:45:45 -0700513 cmd := exec.CommandContext(ctx, "git", "merge-base", "--is-ancestor", r.sketchBaseRef, head)
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000514 cmd.Dir = r.repoRoot
515 err = cmd.Run()
516 if err != nil {
517 if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
518 // Exit code 1 means not an ancestor
519 return fmt.Errorf("HEAD is not a descendant of the initial commit")
520 }
521 return fmt.Errorf("failed to check whether initial commit is ancestor: %w", err)
522 }
523 return nil
524}
525
Earl Lee2e463fb2025-04-17 11:22:22 -0700526// packagesForFiles returns maps of packages related to the given files:
527// 1. directPkgs: packages that directly contain the changed files
528// 2. allPkgs: all packages that might be affected, including downstream packages that depend on the direct packages
529// It may include false positives.
530// Files must be absolute paths!
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000531func (r *CodeReviewer) packagesForFiles(ctx context.Context, files []string) (allPkgs map[string]*packages.Package, err error) {
Earl Lee2e463fb2025-04-17 11:22:22 -0700532 for _, f := range files {
533 if !filepath.IsAbs(f) {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000534 return nil, fmt.Errorf("path %q is not absolute", f)
Earl Lee2e463fb2025-04-17 11:22:22 -0700535 }
536 }
537 cfg := &packages.Config{
538 Mode: packages.LoadImports | packages.NeedEmbedFiles,
539 Context: ctx,
540 // Logf: func(msg string, args ...any) {
541 // slog.DebugContext(ctx, "loading go packages", "msg", fmt.Sprintf(msg, args...))
542 // },
543 // TODO: in theory, go.mod might not be in the repo root, and there might be multiple go.mod files.
544 // We can cross that bridge when we get there.
545 Dir: r.repoRoot,
546 Tests: true,
547 }
548 universe, err := packages.Load(cfg, "./...")
549 if err != nil {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000550 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -0700551 }
552 // Identify packages that directly contain the changed files
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000553 directPkgs := make(map[string]*packages.Package) // import path -> package
Earl Lee2e463fb2025-04-17 11:22:22 -0700554 for _, pkg := range universe {
Earl Lee2e463fb2025-04-17 11:22:22 -0700555 pkgFiles := allFiles(pkg)
Earl Lee2e463fb2025-04-17 11:22:22 -0700556 for _, file := range files {
557 if pkgFiles[file] {
558 // prefer test packages, as they contain strictly more files (right?)
559 prev := directPkgs[pkg.PkgPath]
560 if prev == nil || prev.ForTest == "" {
561 directPkgs[pkg.PkgPath] = pkg
562 }
563 }
564 }
565 }
566
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000567 allPkgs = maps.Clone(directPkgs)
Earl Lee2e463fb2025-04-17 11:22:22 -0700568
569 // Add packages that depend on the direct packages
570 addDependentPackages(universe, allPkgs)
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000571 return allPkgs, nil
Earl Lee2e463fb2025-04-17 11:22:22 -0700572}
573
574// allFiles returns all files that might be referenced by the package.
575// It may contain false positives.
576func allFiles(p *packages.Package) map[string]bool {
577 files := make(map[string]bool)
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000578 // Add files from package info
Earl Lee2e463fb2025-04-17 11:22:22 -0700579 add := [][]string{p.GoFiles, p.CompiledGoFiles, p.OtherFiles, p.EmbedFiles, p.IgnoredFiles}
580 for _, extra := range add {
581 for _, file := range extra {
582 files[file] = true
583 }
584 }
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000585 // Add files from testdata directory
586 testdataDir := filepath.Join(p.Dir, "testdata")
587 if _, err := os.Stat(testdataDir); err == nil {
588 fsys := os.DirFS(p.Dir)
589 fs.WalkDir(fsys, "testdata", func(path string, d fs.DirEntry, err error) error {
590 if err == nil && !d.IsDir() {
591 files[filepath.Join(p.Dir, path)] = true
592 }
593 return nil
594 })
595 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700596 return files
597}
598
599// addDependentPackages adds to pkgs all packages from universe
600// that directly or indirectly depend on any package already in pkgs.
601func addDependentPackages(universe []*packages.Package, pkgs map[string]*packages.Package) {
602 for {
603 changed := false
604 for _, p := range universe {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000605 if strings.HasSuffix(p.PkgPath, ".test") { // ick, but I don't see another way
606 // skip test packages
607 continue
608 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700609 if _, ok := pkgs[p.PkgPath]; ok {
610 // already in pkgs
611 continue
612 }
613 for importPath := range p.Imports {
614 if _, ok := pkgs[importPath]; ok {
615 // imports a package dependent on pkgs, add it
616 pkgs[p.PkgPath] = p
617 changed = true
618 break
619 }
620 }
621 }
622 if !changed {
623 break
624 }
625 }
626}
627
628// testJSON is a union of BuildEvent and TestEvent
629type testJSON struct {
630 // TestEvent only:
631 // The Time field holds the time the event happened. It is conventionally omitted
632 // for cached test results.
633 Time time.Time `json:"Time"`
634 // BuildEvent only:
635 // The ImportPath field gives the package ID of the package being built.
636 // This matches the Package.ImportPath field of go list -json and the
637 // TestEvent.FailedBuild field of go test -json. Note that it does not
638 // match TestEvent.Package.
639 ImportPath string `json:"ImportPath"` // BuildEvent only
640 // TestEvent only:
641 // The Package field, if present, specifies the package being tested. When the
642 // go command runs parallel tests in -json mode, events from different tests are
643 // interlaced; the Package field allows readers to separate them.
644 Package string `json:"Package"`
645 // Action is used in both BuildEvent and TestEvent.
646 // It is the key to distinguishing between them.
647 // BuildEvent:
648 // build-output or build-fail
649 // TestEvent:
650 // start, run, pause, cont, pass, bench, fail, output, skip
651 Action string `json:"Action"`
652 // TestEvent only:
653 // The Test field, if present, specifies the test, example, or benchmark function
654 // that caused the event. Events for the overall package test do not set Test.
655 Test string `json:"Test"`
656 // TestEvent only:
657 // The Elapsed field is set for "pass" and "fail" events. It gives the time elapsed in seconds
658 // for the specific test or the overall package test that passed or failed.
659 Elapsed float64
660 // TestEvent:
661 // The Output field is set for Action == "output" and is a portion of the
662 // test's output (standard output and standard error merged together). The
663 // output is unmodified except that invalid UTF-8 output from a test is coerced
664 // into valid UTF-8 by use of replacement characters. With that one exception,
665 // the concatenation of the Output fields of all output events is the exact output
666 // of the test execution.
667 // BuildEvent:
668 // The Output field is set for Action == "build-output" and is a portion of
669 // the build's output. The concatenation of the Output fields of all output
670 // events is the exact output of the build. A single event may contain one
671 // or more lines of output and there may be more than one output event for
672 // a given ImportPath. This matches the definition of the TestEvent.Output
673 // field produced by go test -json.
674 Output string `json:"Output"`
675 // TestEvent only:
676 // The FailedBuild field is set for Action == "fail" if the test failure was caused
677 // by a build failure. It contains the package ID of the package that failed to
678 // build. This matches the ImportPath field of the "go list" output, as well as the
679 // BuildEvent.ImportPath field as emitted by "go build -json".
680 FailedBuild string `json:"FailedBuild"`
681}
682
683// parseTestResults converts test output in JSONL format into a slice of testJSON objects
684func parseTestResults(testOutput []byte) ([]testJSON, error) {
685 var results []testJSON
686 dec := json.NewDecoder(bytes.NewReader(testOutput))
687 for {
688 var event testJSON
689 if err := dec.Decode(&event); err != nil {
690 if err == io.EOF {
691 break
692 }
693 return nil, err
694 }
695 results = append(results, event)
696 }
697 return results, nil
698}
699
700// testStatus represents the status of a test in a given commit
701type testStatus int
702
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700703//go:generate go tool golang.org/x/tools/cmd/stringer -type=testStatus -trimprefix=testStatus
Earl Lee2e463fb2025-04-17 11:22:22 -0700704const (
705 testStatusUnknown testStatus = iota
706 testStatusPass
707 testStatusFail
708 testStatusBuildFail
709 testStatusSkip
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000710 testStatusNoTests // no tests exist for this package
Earl Lee2e463fb2025-04-17 11:22:22 -0700711)
712
Earl Lee2e463fb2025-04-17 11:22:22 -0700713// testRegression represents a test that regressed between commits
714type testRegression struct {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000715 Package string
716 Test string // empty for package tests
Earl Lee2e463fb2025-04-17 11:22:22 -0700717 BeforeStatus testStatus
718 AfterStatus testStatus
719 Output string // failure output in the after state
720}
721
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000722func (r *testRegression) Source() string {
723 if r.Test == "" {
724 return r.Package
Earl Lee2e463fb2025-04-17 11:22:22 -0700725 }
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000726 return fmt.Sprintf("%s.%s", r.Package, r.Test)
727}
Earl Lee2e463fb2025-04-17 11:22:22 -0700728
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000729type packageResult struct {
730 Status testStatus // overall status for the package
731 TestStatus map[string]testStatus // name -> status
732 TestOutput map[string][]string // name -> output parts
733}
Earl Lee2e463fb2025-04-17 11:22:22 -0700734
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000735// collectTestStatuses processes a slice of test events and returns rich status information
736func collectTestStatuses(results []testJSON) map[string]*packageResult {
737 m := make(map[string]*packageResult)
738
739 for _, event := range results {
740 pkg := event.Package
741 p, ok := m[pkg]
742 if !ok {
743 p = new(packageResult)
744 p.TestStatus = make(map[string]testStatus)
745 p.TestOutput = make(map[string][]string)
746 m[pkg] = p
Earl Lee2e463fb2025-04-17 11:22:22 -0700747 }
748
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000749 switch event.Action {
750 case "output":
751 p.TestOutput[event.Test] = append(p.TestOutput[event.Test], event.Output)
Earl Lee2e463fb2025-04-17 11:22:22 -0700752 continue
Earl Lee2e463fb2025-04-17 11:22:22 -0700753 case "pass":
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000754 if event.Test == "" {
755 p.Status = testStatusPass
756 } else {
757 p.TestStatus[event.Test] = testStatusPass
758 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700759 case "fail":
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000760 if event.Test == "" {
761 if event.FailedBuild != "" {
762 p.Status = testStatusBuildFail
763 } else {
764 p.Status = testStatusFail
765 }
766 } else {
767 p.TestStatus[event.Test] = testStatusFail
768 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700769 case "skip":
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000770 if event.Test == "" {
771 p.Status = testStatusNoTests
772 } else {
773 p.TestStatus[event.Test] = testStatusSkip
774 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700775 }
776 }
777
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000778 return m
Earl Lee2e463fb2025-04-17 11:22:22 -0700779}
780
781// compareTestResults identifies tests that have regressed between commits
782func (r *CodeReviewer) compareTestResults(beforeResults, afterResults []testJSON) ([]testRegression, error) {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000783 before := collectTestStatuses(beforeResults)
784 after := collectTestStatuses(afterResults)
785 var testLevelRegressions []testRegression
786 var packageLevelRegressions []testRegression
Earl Lee2e463fb2025-04-17 11:22:22 -0700787
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000788 afterPkgs := slices.Sorted(maps.Keys(after))
789 for _, pkg := range afterPkgs {
790 afterResult := after[pkg]
791 afterStatus := afterResult.Status
792 // Can't short-circuit here when tests are passing, because we need to check for skipped tests.
793 beforeResult, ok := before[pkg]
794 beforeStatus := testStatusNoTests
795 if ok {
796 beforeStatus = beforeResult.Status
797 }
798 // If things no longer build, stop at the package level.
799 // Otherwise, proceed to the test level, so we have more precise information.
800 if afterStatus == testStatusBuildFail && beforeStatus != testStatusBuildFail {
801 packageLevelRegressions = append(packageLevelRegressions, testRegression{
802 Package: pkg,
803 BeforeStatus: beforeStatus,
804 AfterStatus: afterStatus,
805 })
806 continue
807 }
808 tests := slices.Sorted(maps.Keys(afterResult.TestStatus))
809 for _, test := range tests {
810 afterStatus := afterResult.TestStatus[test]
811 switch afterStatus {
812 case testStatusPass:
813 continue
814 case testStatusUnknown:
815 slog.WarnContext(context.Background(), "unknown test status", "package", pkg, "test", test)
816 continue
817 }
818 beforeStatus := beforeResult.TestStatus[test]
819 if isRegression(beforeStatus, afterStatus) {
820 testLevelRegressions = append(testLevelRegressions, testRegression{
821 Package: pkg,
822 Test: test,
823 BeforeStatus: beforeStatus,
824 AfterStatus: afterStatus,
825 Output: strings.Join(afterResult.TestOutput[test], ""),
826 })
827 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700828 }
829 }
830
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000831 // If we have test-level regressions, report only those
832 // Otherwise, report package-level regressions
Earl Lee2e463fb2025-04-17 11:22:22 -0700833 var regressions []testRegression
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000834 if len(testLevelRegressions) > 0 {
835 regressions = testLevelRegressions
836 } else {
837 regressions = packageLevelRegressions
Earl Lee2e463fb2025-04-17 11:22:22 -0700838 }
839
840 // Sort regressions for consistent output
841 slices.SortFunc(regressions, func(a, b testRegression) int {
842 // First by package
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000843 if c := strings.Compare(a.Package, b.Package); c != 0 {
Earl Lee2e463fb2025-04-17 11:22:22 -0700844 return c
845 }
846 // Then by test name
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000847 return strings.Compare(a.Test, b.Test)
Earl Lee2e463fb2025-04-17 11:22:22 -0700848 })
849
850 return regressions, nil
851}
852
853// badnessLevels maps test status to a badness level
854// Higher values indicate worse status (more severe issues)
855var badnessLevels = map[testStatus]int{
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000856 testStatusBuildFail: 5, // Worst
857 testStatusFail: 4,
858 testStatusSkip: 3,
859 testStatusNoTests: 2,
Earl Lee2e463fb2025-04-17 11:22:22 -0700860 testStatusPass: 1,
861 testStatusUnknown: 0, // Least bad - avoids false positives
862}
863
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000864// regressionFormatter defines a mapping of before/after state pairs to descriptive messages
865type regressionKey struct {
866 before, after testStatus
867}
868
869var regressionMessages = map[regressionKey]string{
870 {testStatusUnknown, testStatusBuildFail}: "New test has build/vet errors",
871 {testStatusUnknown, testStatusFail}: "New test is failing",
872 {testStatusUnknown, testStatusSkip}: "New test is skipped",
873 {testStatusPass, testStatusBuildFail}: "Was passing, now has build/vet errors",
874 {testStatusPass, testStatusFail}: "Was passing, now failing",
875 {testStatusPass, testStatusSkip}: "Was passing, now skipped",
876 {testStatusNoTests, testStatusBuildFail}: "Previously had no tests, now has build/vet errors",
877 {testStatusNoTests, testStatusFail}: "Previously had no tests, now has failing tests",
878 {testStatusNoTests, testStatusSkip}: "Previously had no tests, now has skipped tests",
879 {testStatusSkip, testStatusBuildFail}: "Was skipped, now has build/vet errors",
880 {testStatusSkip, testStatusFail}: "Was skipped, now failing",
881 {testStatusFail, testStatusBuildFail}: "Was failing, now has build/vet errors",
882}
883
Earl Lee2e463fb2025-04-17 11:22:22 -0700884// isRegression determines if a test has regressed based on before and after status
885// A regression is defined as an increase in badness level
886func isRegression(before, after testStatus) bool {
887 // Higher badness level means worse status
888 return badnessLevels[after] > badnessLevels[before]
889}
890
891// formatTestRegressions generates a human-readable summary of test regressions
892func (r *CodeReviewer) formatTestRegressions(regressions []testRegression) string {
893 if len(regressions) == 0 {
894 return ""
895 }
896
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000897 buf := new(strings.Builder)
Philip Zeyliger49edc922025-05-14 09:45:45 -0700898 fmt.Fprintf(buf, "Test regressions detected between initial commit (%s) and HEAD:\n\n", r.sketchBaseRef)
Earl Lee2e463fb2025-04-17 11:22:22 -0700899
900 for i, reg := range regressions {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000901 fmt.Fprintf(buf, "%d: %v: ", i+1, reg.Source())
902 key := regressionKey{reg.BeforeStatus, reg.AfterStatus}
903 message, exists := regressionMessages[key]
904 if !exists {
905 message = "Regression detected"
Earl Lee2e463fb2025-04-17 11:22:22 -0700906 }
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000907 fmt.Fprintf(buf, "%s\n", message)
Earl Lee2e463fb2025-04-17 11:22:22 -0700908 }
909
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000910 return buf.String()
Earl Lee2e463fb2025-04-17 11:22:22 -0700911}
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000912
913// RelatedFile represents a file historically related to the changed files
914type RelatedFile struct {
915 Path string // Path to the file
916 Correlation float64 // Correlation score (0.0-1.0)
917}
918
919// findRelatedFiles identifies files that are historically related to the changed files
920// by analyzing git commit history for co-occurrences.
921func (r *CodeReviewer) findRelatedFiles(ctx context.Context, changedFiles []string) ([]RelatedFile, error) {
922 commits, err := r.getCommitsTouchingFiles(ctx, changedFiles)
923 if err != nil {
924 return nil, fmt.Errorf("failed to get commits touching files: %w", err)
925 }
926 if len(commits) == 0 {
927 return nil, nil
928 }
929
930 relChanged := make(map[string]bool, len(changedFiles))
931 for _, file := range changedFiles {
932 rel, err := filepath.Rel(r.repoRoot, file)
933 if err != nil {
934 return nil, fmt.Errorf("failed to get relative path for %s: %w", file, err)
935 }
936 relChanged[rel] = true
937 }
938
939 historyFiles := make(map[string]int)
940 var historyMu sync.Mutex
941
942 maxWorkers := runtime.GOMAXPROCS(0)
943 semaphore := make(chan bool, maxWorkers)
944 var wg sync.WaitGroup
945
946 for _, commit := range commits {
947 wg.Add(1)
948 semaphore <- true // acquire
949
950 go func(commit string) {
951 defer wg.Done()
952 defer func() { <-semaphore }() // release
953 commitFiles, err := r.getFilesInCommit(ctx, commit)
954 if err != nil {
955 slog.WarnContext(ctx, "Failed to get files in commit", "commit", commit, "err", err)
956 return
957 }
958 incr := 0
959 for _, file := range commitFiles {
960 if relChanged[file] {
961 incr++
962 }
963 }
964 if incr == 0 {
965 return
966 }
967 historyMu.Lock()
968 defer historyMu.Unlock()
969 for _, file := range commitFiles {
970 historyFiles[file] += incr
971 }
972 }(commit)
973 }
974 wg.Wait()
975
976 // normalize
977 maxCount := 0
978 for _, count := range historyFiles {
979 maxCount = max(maxCount, count)
980 }
981 if maxCount == 0 {
982 return nil, nil
983 }
984
985 var relatedFiles []RelatedFile
986 for file, count := range historyFiles {
987 if relChanged[file] {
988 // Don't include inputs in the output.
989 continue
990 }
991 correlation := float64(count) / float64(maxCount)
992 // Require min correlation to avoid noise
993 if correlation >= 0.1 {
Pokey Rule75f93a12025-05-15 13:51:55 +0000994 // Check if the file still exists in the repository
995 fullPath := filepath.Join(r.repoRoot, file)
996 if _, err := os.Stat(fullPath); err == nil {
997 relatedFiles = append(relatedFiles, RelatedFile{Path: file, Correlation: correlation})
998 }
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000999 }
1000 }
1001
1002 // Highest correlation first
1003 slices.SortFunc(relatedFiles, func(a, b RelatedFile) int {
1004 return -1 * cmp.Compare(a.Correlation, b.Correlation)
1005 })
1006
1007 // Limit to 1 correlated file per input file.
1008 // (Arbitrary limit, to be adjusted.)
1009 maxFiles := len(changedFiles)
1010 if len(relatedFiles) > maxFiles {
1011 relatedFiles = relatedFiles[:maxFiles]
1012 }
1013
1014 // TODO: add an LLM in the mix here (like the keyword search tool) to do a filtering pass,
1015 // and then increase the strength of the wording in the relatedFiles message.
1016
1017 return relatedFiles, nil
1018}
1019
1020// getCommitsTouchingFiles returns all commits that touch any of the specified files
1021func (r *CodeReviewer) getCommitsTouchingFiles(ctx context.Context, files []string) ([]string, error) {
1022 if len(files) == 0 {
1023 return nil, nil
1024 }
Josh Bleecher Snyderd4b1cc72025-04-29 13:49:18 +00001025 fileArgs := append([]string{"rev-list", "--all", "--date-order", "--max-count=100", "--"}, files...)
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +00001026 cmd := exec.CommandContext(ctx, "git", fileArgs...)
1027 cmd.Dir = r.repoRoot
1028 out, err := cmd.CombinedOutput()
1029 if err != nil {
1030 return nil, fmt.Errorf("failed to get commits: %w\n%s", err, out)
1031 }
1032 return nonEmptyTrimmedLines(out), nil
1033}
1034
1035// getFilesInCommit returns all files changed in a specific commit
1036func (r *CodeReviewer) getFilesInCommit(ctx context.Context, commit string) ([]string, error) {
1037 cmd := exec.CommandContext(ctx, "git", "diff-tree", "--no-commit-id", "--name-only", "-r", commit)
1038 cmd.Dir = r.repoRoot
1039 out, err := cmd.CombinedOutput()
1040 if err != nil {
1041 return nil, fmt.Errorf("failed to get files in commit: %w\n%s", err, out)
1042 }
1043 return nonEmptyTrimmedLines(out), nil
1044}
1045
1046func nonEmptyTrimmedLines(b []byte) []string {
1047 var lines []string
1048 for line := range strings.Lines(string(b)) {
1049 line = strings.TrimSpace(line)
1050 if line != "" {
1051 lines = append(lines, line)
1052 }
1053 }
1054 return lines
1055}
1056
1057// formatRelatedFiles formats the related files list into a human-readable message
1058func (r *CodeReviewer) formatRelatedFiles(files []RelatedFile) string {
1059 if len(files) == 0 {
1060 return ""
1061 }
1062
1063 buf := new(strings.Builder)
1064
1065 fmt.Fprintf(buf, "Potentially related files:\n\n")
1066
1067 for _, file := range files {
1068 fmt.Fprintf(buf, "- %s (%0.0f%%)\n", file.Path, 100*file.Correlation)
1069 }
1070
1071 fmt.Fprintf(buf, "\nThese files have historically changed with the files you have modified. Consider whether they require updates as well.\n")
1072 return buf.String()
1073}
Josh Bleecher Snyderde5f7442025-05-15 18:32:32 +00001074
1075// shouldIgnoreDiagnostic reports whether a diagnostic message matches any of the patterns in goplsIgnore.
1076func shouldIgnoreDiagnostic(message string) bool {
1077 for _, pattern := range goplsIgnore {
1078 if strings.Contains(message, pattern) {
1079 return true
1080 }
1081 }
1082 return false
1083}