blob: 44c0ecb939eab542b701fb87f7e64dd181076413 [file] [log] [blame]
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +00001package codereview
Earl Lee2e463fb2025-04-17 11:22:22 -07002
3import (
4 "bytes"
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +00005 "cmp"
Earl Lee2e463fb2025-04-17 11:22:22 -07006 "context"
7 "encoding/json"
8 "fmt"
9 "io"
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +000010 "io/fs"
Earl Lee2e463fb2025-04-17 11:22:22 -070011 "log/slog"
12 "maps"
13 "os"
14 "os/exec"
15 "path/filepath"
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +000016 "runtime"
Earl Lee2e463fb2025-04-17 11:22:22 -070017 "slices"
18 "strings"
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +000019 "sync"
Earl Lee2e463fb2025-04-17 11:22:22 -070020 "time"
21
22 "golang.org/x/tools/go/packages"
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070023 "sketch.dev/llm"
Earl Lee2e463fb2025-04-17 11:22:22 -070024)
25
26// This file does differential quality analysis of a commit relative to a base commit.
27
28// Tool returns a tool spec for a CodeReview tool backed by r.
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070029func (r *CodeReviewer) Tool() *llm.Tool {
30 spec := &llm.Tool{
Earl Lee2e463fb2025-04-17 11:22:22 -070031 Name: "codereview",
32 Description: `Run an automated code review.`,
33 // If you modify this, update the termui template for prettier rendering.
Josh Bleecher Snyder74d690e2025-05-14 18:16:03 -070034 InputSchema: llm.EmptySchema(),
Earl Lee2e463fb2025-04-17 11:22:22 -070035 Run: r.Run,
36 }
37 return spec
38}
39
Philip Zeyliger72252cb2025-05-10 17:00:08 -070040func (r *CodeReviewer) Run(ctx context.Context, m json.RawMessage) ([]llm.Content, error) {
Josh Bleecher Snyder2dc86b92025-04-29 14:11:58 +000041 // NOTE: If you add or modify error messages here, update the corresponding UI parsing in:
42 // webui/src/web-components/sketch-tool-card.ts (SketchToolCardCodeReview.getStatusIcon)
Earl Lee2e463fb2025-04-17 11:22:22 -070043 if err := r.RequireNormalGitState(ctx); err != nil {
44 slog.DebugContext(ctx, "CodeReviewer.Run: failed to check for normal git state", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -070045 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -070046 }
47 if err := r.RequireNoUncommittedChanges(ctx); err != nil {
48 slog.DebugContext(ctx, "CodeReviewer.Run: failed to check for uncommitted changes", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -070049 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -070050 }
51
52 // Check that the current commit is not the initial commit
53 currentCommit, err := r.CurrentCommit(ctx)
54 if err != nil {
55 slog.DebugContext(ctx, "CodeReviewer.Run: failed to get current commit", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -070056 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -070057 }
58 if r.IsInitialCommit(currentCommit) {
59 slog.DebugContext(ctx, "CodeReviewer.Run: current commit is initial commit, nothing to review")
Philip Zeyliger72252cb2025-05-10 17:00:08 -070060 return nil, fmt.Errorf("no new commits have been added, nothing to review")
Earl Lee2e463fb2025-04-17 11:22:22 -070061 }
62
63 // No matter what failures happen from here out, we will declare this to have been reviewed.
64 // This should help avoid the model getting blocked by a broken code review tool.
65 r.reviewed = append(r.reviewed, currentCommit)
66
Philip Zeyliger49edc922025-05-14 09:45:45 -070067 changedFiles, err := r.changedFiles(ctx, r.sketchBaseRef, currentCommit)
Earl Lee2e463fb2025-04-17 11:22:22 -070068 if err != nil {
69 slog.DebugContext(ctx, "CodeReviewer.Run: failed to get changed files", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -070070 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -070071 }
72
73 // Prepare to analyze before/after for the impacted files.
74 // We use the current commit to determine what packages exist and are impacted.
75 // The packages in the initial commit may be different.
76 // Good enough for now.
77 // TODO: do better
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +000078 allPkgs, err := r.packagesForFiles(ctx, changedFiles)
Earl Lee2e463fb2025-04-17 11:22:22 -070079 if err != nil {
80 // TODO: log and skip to stuff that doesn't require packages
81 slog.DebugContext(ctx, "CodeReviewer.Run: failed to get packages for files", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -070082 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -070083 }
84 allPkgList := slices.Collect(maps.Keys(allPkgs))
Earl Lee2e463fb2025-04-17 11:22:22 -070085
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +000086 var errorMessages []string // problems we want the model to address
87 var infoMessages []string // info the model should consider
88
89 // Find potentially related files that should also be considered
90 // TODO: add some caching here, since this depends only on the initial commit and the changed files, not the details of the changes
91 relatedFiles, err := r.findRelatedFiles(ctx, changedFiles)
92 if err != nil {
93 slog.DebugContext(ctx, "CodeReviewer.Run: failed to find related files", "err", err)
94 } else {
95 relatedMsg := r.formatRelatedFiles(relatedFiles)
96 if relatedMsg != "" {
97 infoMessages = append(infoMessages, relatedMsg)
98 }
99 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700100
101 testMsg, err := r.checkTests(ctx, allPkgList)
102 if err != nil {
103 slog.DebugContext(ctx, "CodeReviewer.Run: failed to check tests", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700104 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -0700105 }
106 if testMsg != "" {
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000107 errorMessages = append(errorMessages, testMsg)
Earl Lee2e463fb2025-04-17 11:22:22 -0700108 }
109
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000110 goplsMsg, err := r.checkGopls(ctx, changedFiles) // includes vet checks
Earl Lee2e463fb2025-04-17 11:22:22 -0700111 if err != nil {
112 slog.DebugContext(ctx, "CodeReviewer.Run: failed to check gopls", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700113 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -0700114 }
115 if goplsMsg != "" {
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000116 errorMessages = append(errorMessages, goplsMsg)
Earl Lee2e463fb2025-04-17 11:22:22 -0700117 }
118
Josh Bleecher Snydere2518e52025-04-29 11:13:40 -0700119 if r.llmReview {
120 llmComments, err := r.doLLMReview(ctx)
121 if err != nil {
122 // Log the error but don't fail the codereview if this check fails
123 slog.WarnContext(ctx, "CodeReviewer.Run: error doing LLM review", "err", err)
124 }
125 if llmComments != "" {
126 infoMessages = append(infoMessages, llmComments)
127 }
128 }
129
Josh Bleecher Snyder2dc86b92025-04-29 14:11:58 +0000130 // NOTE: If you change this output format, update the corresponding UI parsing in:
131 // webui/src/web-components/sketch-tool-card.ts (SketchToolCardCodeReview.getStatusIcon)
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000132 buf := new(strings.Builder)
133 if len(infoMessages) > 0 {
134 buf.WriteString("# Info\n\n")
135 buf.WriteString(strings.Join(infoMessages, "\n\n"))
136 buf.WriteString("\n\n")
Earl Lee2e463fb2025-04-17 11:22:22 -0700137 }
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000138 if len(errorMessages) > 0 {
139 buf.WriteString("# Errors\n\n")
140 buf.WriteString(strings.Join(errorMessages, "\n\n"))
141 buf.WriteString("\n\nPlease fix before proceeding.\n")
142 }
143 if buf.Len() == 0 {
144 buf.WriteString("OK")
145 }
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700146 return llm.TextContent(buf.String()), nil
Earl Lee2e463fb2025-04-17 11:22:22 -0700147}
148
149func (r *CodeReviewer) initializeInitialCommitWorktree(ctx context.Context) error {
150 if r.initialWorktree != "" {
151 return nil
152 }
153 tmpDir, err := os.MkdirTemp("", "sketch-codereview-worktree")
154 if err != nil {
155 return err
156 }
Philip Zeyliger49edc922025-05-14 09:45:45 -0700157 worktreeCmd := exec.CommandContext(ctx, "git", "worktree", "add", "--detach", tmpDir, r.sketchBaseRef)
Earl Lee2e463fb2025-04-17 11:22:22 -0700158 worktreeCmd.Dir = r.repoRoot
159 out, err := worktreeCmd.CombinedOutput()
160 if err != nil {
161 return fmt.Errorf("unable to create worktree for initial commit: %w\n%s", err, out)
162 }
163 r.initialWorktree = tmpDir
164 return nil
165}
166
167func (r *CodeReviewer) checkTests(ctx context.Context, pkgList []string) (string, error) {
Josh Bleecher Snyderde5f7442025-05-15 18:32:32 +0000168 // 'gopls check' covers everything that 'go vet' covers.
169 // Disabling vet here speeds things up, and allows more precise filtering and reporting.
170 goTestArgs := []string{"test", "-json", "-v", "-vet=off"}
Earl Lee2e463fb2025-04-17 11:22:22 -0700171 goTestArgs = append(goTestArgs, pkgList...)
172
173 afterTestCmd := exec.CommandContext(ctx, "go", goTestArgs...)
174 afterTestCmd.Dir = r.repoRoot
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000175 afterTestOut, _ := afterTestCmd.Output()
176 // unfortunately, we can't short-circuit here even if all tests pass,
177 // because we need to check for skipped tests.
Earl Lee2e463fb2025-04-17 11:22:22 -0700178
179 err := r.initializeInitialCommitWorktree(ctx)
180 if err != nil {
181 return "", err
182 }
183
184 beforeTestCmd := exec.CommandContext(ctx, "go", goTestArgs...)
185 beforeTestCmd.Dir = r.initialWorktree
186 beforeTestOut, _ := beforeTestCmd.Output() // ignore error, interesting info is in the output
187
188 // Parse the jsonl test results
189 beforeResults, beforeParseErr := parseTestResults(beforeTestOut)
190 if beforeParseErr != nil {
191 return "", fmt.Errorf("unable to parse test results for initial commit: %w\n%s", beforeParseErr, beforeTestOut)
192 }
193 afterResults, afterParseErr := parseTestResults(afterTestOut)
194 if afterParseErr != nil {
195 return "", fmt.Errorf("unable to parse test results for current commit: %w\n%s", afterParseErr, afterTestOut)
196 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700197 testRegressions, err := r.compareTestResults(beforeResults, afterResults)
198 if err != nil {
199 return "", fmt.Errorf("failed to compare test results: %w", err)
200 }
201 // TODO: better output formatting?
202 res := r.formatTestRegressions(testRegressions)
203 return res, nil
204}
205
Earl Lee2e463fb2025-04-17 11:22:22 -0700206// GoplsIssue represents a single issue reported by gopls check
207type GoplsIssue struct {
208 Position string // File position in format "file:line:col-range"
209 Message string // Description of the issue
210}
211
Josh Bleecher Snyderde5f7442025-05-15 18:32:32 +0000212// goplsIgnore contains substring patterns for gopls (and vet) diagnostic messages that should be suppressed.
213var goplsIgnore = []string{
214 // these are often just wrong, see https://github.com/golang/go/issues/57059#issuecomment-2884771470
215 "ends with redundant newline",
216
217 // as of May 2025, Claude doesn't understand strings/bytes.SplitSeq well enough to use it
218 "SplitSeq",
219}
220
Earl Lee2e463fb2025-04-17 11:22:22 -0700221// checkGopls runs gopls check on the provided files in both the current and initial state,
222// compares the results, and reports any new issues introduced in the current state.
223func (r *CodeReviewer) checkGopls(ctx context.Context, changedFiles []string) (string, error) {
224 if len(changedFiles) == 0 {
225 return "", nil // no files to check
226 }
227
228 // Filter out non-Go files as gopls only works on Go files
229 // and verify they still exist (not deleted)
230 var goFiles []string
231 for _, file := range changedFiles {
232 if !strings.HasSuffix(file, ".go") {
233 continue // not a Go file
234 }
235
236 // Check if the file still exists (not deleted)
237 if _, err := os.Stat(file); os.IsNotExist(err) {
238 continue // file doesn't exist anymore (deleted)
239 }
240
241 goFiles = append(goFiles, file)
242 }
243
244 if len(goFiles) == 0 {
245 return "", nil // no Go files to check
246 }
247
248 // Run gopls check on the current state
249 goplsArgs := append([]string{"check"}, goFiles...)
250
251 afterGoplsCmd := exec.CommandContext(ctx, "gopls", goplsArgs...)
252 afterGoplsCmd.Dir = r.repoRoot
253 afterGoplsOut, err := afterGoplsCmd.CombinedOutput() // gopls returns non-zero if it finds issues
254 if err != nil {
255 // Check if the output looks like real gopls issues or if it's just error output
256 if !looksLikeGoplsIssues(afterGoplsOut) {
257 slog.WarnContext(ctx, "CodeReviewer.checkGopls: gopls check failed to run properly", "err", err, "output", string(afterGoplsOut))
258 return "", nil // Skip rather than failing the entire code review
259 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700260 }
261
262 // Parse the output
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000263 afterIssues := parseGoplsOutput(r.repoRoot, afterGoplsOut)
Earl Lee2e463fb2025-04-17 11:22:22 -0700264
265 // If no issues were found, we're done
266 if len(afterIssues) == 0 {
267 return "", nil
268 }
269
270 // Gopls detected issues in the current state, check if they existed in the initial state
271 initErr := r.initializeInitialCommitWorktree(ctx)
272 if initErr != nil {
273 return "", err
274 }
275
276 // For each file that exists in the initial commit, run gopls check
277 var initialFilesToCheck []string
278 for _, file := range goFiles {
279 // Get relative path for git operations
280 relFile, err := filepath.Rel(r.repoRoot, file)
281 if err != nil {
282 slog.WarnContext(ctx, "CodeReviewer.checkGopls: failed to get relative path", "repo_root", r.repoRoot, "file", file, "err", err)
283 continue
284 }
285
286 // Check if the file exists in the initial commit
Philip Zeyliger49edc922025-05-14 09:45:45 -0700287 checkCmd := exec.CommandContext(ctx, "git", "cat-file", "-e", fmt.Sprintf("%s:%s", r.sketchBaseRef, relFile))
Earl Lee2e463fb2025-04-17 11:22:22 -0700288 checkCmd.Dir = r.repoRoot
289 if err := checkCmd.Run(); err == nil {
290 // File exists in initial commit
291 initialFilePath := filepath.Join(r.initialWorktree, relFile)
292 initialFilesToCheck = append(initialFilesToCheck, initialFilePath)
293 }
294 }
295
296 // Run gopls check on the files that existed in the initial commit
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000297 var beforeIssues []GoplsIssue
Earl Lee2e463fb2025-04-17 11:22:22 -0700298 if len(initialFilesToCheck) > 0 {
299 beforeGoplsArgs := append([]string{"check"}, initialFilesToCheck...)
300 beforeGoplsCmd := exec.CommandContext(ctx, "gopls", beforeGoplsArgs...)
301 beforeGoplsCmd.Dir = r.initialWorktree
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000302 beforeGoplsOut, beforeCmdErr := beforeGoplsCmd.CombinedOutput()
Earl Lee2e463fb2025-04-17 11:22:22 -0700303 if beforeCmdErr != nil && !looksLikeGoplsIssues(beforeGoplsOut) {
304 // If gopls fails to run properly on the initial commit, log a warning and continue
305 // with empty before issues - this will be conservative and report more issues
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000306 slog.WarnContext(ctx, "CodeReviewer.checkGopls: gopls check failed on initial commit", "err", err, "output", string(beforeGoplsOut))
Earl Lee2e463fb2025-04-17 11:22:22 -0700307 } else {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000308 beforeIssues = parseGoplsOutput(r.initialWorktree, beforeGoplsOut)
Earl Lee2e463fb2025-04-17 11:22:22 -0700309 }
310 }
311
312 // Find new issues that weren't present in the initial state
313 goplsRegressions := findGoplsRegressions(beforeIssues, afterIssues)
314 if len(goplsRegressions) == 0 {
315 return "", nil // no new issues
316 }
317
318 // Format the results
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000319 return r.formatGoplsRegressions(goplsRegressions), nil
Earl Lee2e463fb2025-04-17 11:22:22 -0700320}
321
Josh Bleecher Snyderde5f7442025-05-15 18:32:32 +0000322// parseGoplsOutput parses the text output from gopls check.
323// It drops any that match the patterns in goplsIgnore.
Earl Lee2e463fb2025-04-17 11:22:22 -0700324// Each line has the format: '/path/to/file.go:448:22-26: unused parameter: path'
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000325func parseGoplsOutput(root string, output []byte) []GoplsIssue {
Earl Lee2e463fb2025-04-17 11:22:22 -0700326 var issues []GoplsIssue
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +0000327 for line := range strings.Lines(string(output)) {
Earl Lee2e463fb2025-04-17 11:22:22 -0700328 line = strings.TrimSpace(line)
329 if line == "" {
330 continue
331 }
332
333 // Skip lines that look like error messages rather than gopls issues
334 if strings.HasPrefix(line, "Error:") ||
335 strings.HasPrefix(line, "Failed:") ||
336 strings.HasPrefix(line, "Warning:") ||
337 strings.HasPrefix(line, "gopls:") {
338 continue
339 }
340
341 // Find the first colon that separates the file path from the line number
342 firstColonIdx := strings.Index(line, ":")
343 if firstColonIdx < 0 {
344 continue // Invalid format
345 }
346
347 // Verify the part before the first colon looks like a file path
348 potentialPath := line[:firstColonIdx]
349 if !strings.HasSuffix(potentialPath, ".go") {
350 continue // Not a Go file path
351 }
352
353 // Find the position of the first message separator ': '
354 // This separates the position info from the message
355 messageStart := strings.Index(line, ": ")
356 if messageStart < 0 || messageStart <= firstColonIdx {
357 continue // Invalid format
358 }
359
360 // Extract position and message
361 position := line[:messageStart]
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000362 rel, err := filepath.Rel(root, position)
363 if err == nil {
364 position = rel
365 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700366 message := line[messageStart+2:] // Skip the ': ' separator
367
368 // Verify position has the expected format (at least 2 colons for line:col)
369 colonCount := strings.Count(position, ":")
370 if colonCount < 2 {
371 continue // Not enough position information
372 }
373
Josh Bleecher Snyderde5f7442025-05-15 18:32:32 +0000374 // Skip diagnostics that match any of our ignored patterns
375 if shouldIgnoreDiagnostic(message) {
376 continue
377 }
378
Earl Lee2e463fb2025-04-17 11:22:22 -0700379 issues = append(issues, GoplsIssue{
380 Position: position,
381 Message: message,
382 })
383 }
384
385 return issues
386}
387
388// looksLikeGoplsIssues checks if the output appears to be actual gopls issues
389// rather than error messages about gopls itself failing
390func looksLikeGoplsIssues(output []byte) bool {
391 // If output is empty, it's not valid issues
392 if len(output) == 0 {
393 return false
394 }
395
396 // Check if output has at least one line that looks like a gopls issue
397 // A gopls issue looks like: '/path/to/file.go:123:45-67: message'
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +0000398 for line := range strings.Lines(string(output)) {
Earl Lee2e463fb2025-04-17 11:22:22 -0700399 line = strings.TrimSpace(line)
400 if line == "" {
401 continue
402 }
403
404 // A gopls issue has at least two colons (file path, line number, column)
405 // and contains a colon followed by a space (separating position from message)
406 colonCount := strings.Count(line, ":")
407 hasSeparator := strings.Contains(line, ": ")
408
409 if colonCount >= 2 && hasSeparator {
410 // Check if it starts with a likely file path (ending in .go)
411 parts := strings.SplitN(line, ":", 2)
412 if strings.HasSuffix(parts[0], ".go") {
413 return true
414 }
415 }
416 }
417 return false
418}
419
420// normalizeGoplsPosition extracts just the file path from a position string
421func normalizeGoplsPosition(position string) string {
422 // Extract just the file path by taking everything before the first colon
423 parts := strings.Split(position, ":")
424 if len(parts) < 1 {
425 return position
426 }
427 return parts[0]
428}
429
430// findGoplsRegressions identifies gopls issues that are new in the after state
431func findGoplsRegressions(before, after []GoplsIssue) []GoplsIssue {
432 var regressions []GoplsIssue
433
434 // Build map of before issues for easier lookup
435 beforeIssueMap := make(map[string]map[string]bool) // file -> message -> exists
436 for _, issue := range before {
437 file := normalizeGoplsPosition(issue.Position)
438 if _, exists := beforeIssueMap[file]; !exists {
439 beforeIssueMap[file] = make(map[string]bool)
440 }
441 // Store both the exact message and the general issue type for fuzzy matching
442 beforeIssueMap[file][issue.Message] = true
443
444 // Extract the general issue type (everything before the first ':' in the message)
445 generalIssue := issue.Message
446 if colonIdx := strings.Index(issue.Message, ":"); colonIdx > 0 {
447 generalIssue = issue.Message[:colonIdx]
448 }
449 beforeIssueMap[file][generalIssue] = true
450 }
451
452 // Check each after issue to see if it's new
453 for _, afterIssue := range after {
454 file := normalizeGoplsPosition(afterIssue.Position)
455 isNew := true
456
457 if fileIssues, fileExists := beforeIssueMap[file]; fileExists {
458 // Check for exact message match
459 if fileIssues[afterIssue.Message] {
460 isNew = false
461 } else {
462 // Check for general issue type match
463 generalIssue := afterIssue.Message
464 if colonIdx := strings.Index(afterIssue.Message, ":"); colonIdx > 0 {
465 generalIssue = afterIssue.Message[:colonIdx]
466 }
467 if fileIssues[generalIssue] {
468 isNew = false
469 }
470 }
471 }
472
473 if isNew {
474 regressions = append(regressions, afterIssue)
475 }
476 }
477
478 // Sort regressions for deterministic output
479 slices.SortFunc(regressions, func(a, b GoplsIssue) int {
480 return strings.Compare(a.Position, b.Position)
481 })
482
483 return regressions
484}
485
486// formatGoplsRegressions generates a human-readable summary of gopls check regressions
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000487func (r *CodeReviewer) formatGoplsRegressions(regressions []GoplsIssue) string {
Earl Lee2e463fb2025-04-17 11:22:22 -0700488 if len(regressions) == 0 {
489 return ""
490 }
491
492 var sb strings.Builder
493 sb.WriteString("Gopls check issues detected:\n\n")
494
495 // Format each issue
496 for i, issue := range regressions {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000497 sb.WriteString(fmt.Sprintf("%d. %s: %s\n", i+1, filepath.Join(r.repoRoot, issue.Position), issue.Message))
Earl Lee2e463fb2025-04-17 11:22:22 -0700498 }
499
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000500 sb.WriteString("\nIMPORTANT: Only fix new gopls check issues in parts of the code that you have already edited.")
501 sb.WriteString(" Do not change existing code that was not part of your current edits.\n")
Earl Lee2e463fb2025-04-17 11:22:22 -0700502 return sb.String()
503}
504
505func (r *CodeReviewer) HasReviewed(commit string) bool {
506 return slices.Contains(r.reviewed, commit)
507}
508
509func (r *CodeReviewer) IsInitialCommit(commit string) bool {
Philip Zeyliger49edc922025-05-14 09:45:45 -0700510 return commit == r.sketchBaseRef
Earl Lee2e463fb2025-04-17 11:22:22 -0700511}
512
Philip Zeyliger49edc922025-05-14 09:45:45 -0700513// requireHEADDescendantOfSketchBaseRef returns an error if HEAD is not a descendant of r.initialCommit.
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000514// This serves two purposes:
515// - ensures we're not still on the initial commit
516// - ensures we're not on a separate branch or upstream somewhere, which would be confusing
Philip Zeyliger49edc922025-05-14 09:45:45 -0700517func (r *CodeReviewer) requireHEADDescendantOfSketchBaseRef(ctx context.Context) error {
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000518 head, err := r.CurrentCommit(ctx)
519 if err != nil {
520 return err
521 }
522
523 // Note: Git's merge-base --is-ancestor checks strict ancestry (i.e., <), so a commit is NOT an ancestor of itself.
Philip Zeyliger49edc922025-05-14 09:45:45 -0700524 cmd := exec.CommandContext(ctx, "git", "merge-base", "--is-ancestor", r.sketchBaseRef, head)
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000525 cmd.Dir = r.repoRoot
526 err = cmd.Run()
527 if err != nil {
528 if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
529 // Exit code 1 means not an ancestor
530 return fmt.Errorf("HEAD is not a descendant of the initial commit")
531 }
532 return fmt.Errorf("failed to check whether initial commit is ancestor: %w", err)
533 }
534 return nil
535}
536
Earl Lee2e463fb2025-04-17 11:22:22 -0700537// packagesForFiles returns maps of packages related to the given files:
538// 1. directPkgs: packages that directly contain the changed files
539// 2. allPkgs: all packages that might be affected, including downstream packages that depend on the direct packages
540// It may include false positives.
541// Files must be absolute paths!
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000542func (r *CodeReviewer) packagesForFiles(ctx context.Context, files []string) (allPkgs map[string]*packages.Package, err error) {
Earl Lee2e463fb2025-04-17 11:22:22 -0700543 for _, f := range files {
544 if !filepath.IsAbs(f) {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000545 return nil, fmt.Errorf("path %q is not absolute", f)
Earl Lee2e463fb2025-04-17 11:22:22 -0700546 }
547 }
548 cfg := &packages.Config{
549 Mode: packages.LoadImports | packages.NeedEmbedFiles,
550 Context: ctx,
551 // Logf: func(msg string, args ...any) {
552 // slog.DebugContext(ctx, "loading go packages", "msg", fmt.Sprintf(msg, args...))
553 // },
554 // TODO: in theory, go.mod might not be in the repo root, and there might be multiple go.mod files.
555 // We can cross that bridge when we get there.
556 Dir: r.repoRoot,
557 Tests: true,
558 }
559 universe, err := packages.Load(cfg, "./...")
560 if err != nil {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000561 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -0700562 }
563 // Identify packages that directly contain the changed files
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000564 directPkgs := make(map[string]*packages.Package) // import path -> package
Earl Lee2e463fb2025-04-17 11:22:22 -0700565 for _, pkg := range universe {
Earl Lee2e463fb2025-04-17 11:22:22 -0700566 pkgFiles := allFiles(pkg)
Earl Lee2e463fb2025-04-17 11:22:22 -0700567 for _, file := range files {
568 if pkgFiles[file] {
569 // prefer test packages, as they contain strictly more files (right?)
570 prev := directPkgs[pkg.PkgPath]
571 if prev == nil || prev.ForTest == "" {
572 directPkgs[pkg.PkgPath] = pkg
573 }
574 }
575 }
576 }
577
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000578 allPkgs = maps.Clone(directPkgs)
Earl Lee2e463fb2025-04-17 11:22:22 -0700579
580 // Add packages that depend on the direct packages
581 addDependentPackages(universe, allPkgs)
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000582 return allPkgs, nil
Earl Lee2e463fb2025-04-17 11:22:22 -0700583}
584
585// allFiles returns all files that might be referenced by the package.
586// It may contain false positives.
587func allFiles(p *packages.Package) map[string]bool {
588 files := make(map[string]bool)
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000589 // Add files from package info
Earl Lee2e463fb2025-04-17 11:22:22 -0700590 add := [][]string{p.GoFiles, p.CompiledGoFiles, p.OtherFiles, p.EmbedFiles, p.IgnoredFiles}
591 for _, extra := range add {
592 for _, file := range extra {
593 files[file] = true
594 }
595 }
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000596 // Add files from testdata directory
597 testdataDir := filepath.Join(p.Dir, "testdata")
598 if _, err := os.Stat(testdataDir); err == nil {
599 fsys := os.DirFS(p.Dir)
600 fs.WalkDir(fsys, "testdata", func(path string, d fs.DirEntry, err error) error {
601 if err == nil && !d.IsDir() {
602 files[filepath.Join(p.Dir, path)] = true
603 }
604 return nil
605 })
606 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700607 return files
608}
609
610// addDependentPackages adds to pkgs all packages from universe
611// that directly or indirectly depend on any package already in pkgs.
612func addDependentPackages(universe []*packages.Package, pkgs map[string]*packages.Package) {
613 for {
614 changed := false
615 for _, p := range universe {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000616 if strings.HasSuffix(p.PkgPath, ".test") { // ick, but I don't see another way
617 // skip test packages
618 continue
619 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700620 if _, ok := pkgs[p.PkgPath]; ok {
621 // already in pkgs
622 continue
623 }
624 for importPath := range p.Imports {
625 if _, ok := pkgs[importPath]; ok {
626 // imports a package dependent on pkgs, add it
627 pkgs[p.PkgPath] = p
628 changed = true
629 break
630 }
631 }
632 }
633 if !changed {
634 break
635 }
636 }
637}
638
639// testJSON is a union of BuildEvent and TestEvent
640type testJSON struct {
641 // TestEvent only:
642 // The Time field holds the time the event happened. It is conventionally omitted
643 // for cached test results.
644 Time time.Time `json:"Time"`
645 // BuildEvent only:
646 // The ImportPath field gives the package ID of the package being built.
647 // This matches the Package.ImportPath field of go list -json and the
648 // TestEvent.FailedBuild field of go test -json. Note that it does not
649 // match TestEvent.Package.
650 ImportPath string `json:"ImportPath"` // BuildEvent only
651 // TestEvent only:
652 // The Package field, if present, specifies the package being tested. When the
653 // go command runs parallel tests in -json mode, events from different tests are
654 // interlaced; the Package field allows readers to separate them.
655 Package string `json:"Package"`
656 // Action is used in both BuildEvent and TestEvent.
657 // It is the key to distinguishing between them.
658 // BuildEvent:
659 // build-output or build-fail
660 // TestEvent:
661 // start, run, pause, cont, pass, bench, fail, output, skip
662 Action string `json:"Action"`
663 // TestEvent only:
664 // The Test field, if present, specifies the test, example, or benchmark function
665 // that caused the event. Events for the overall package test do not set Test.
666 Test string `json:"Test"`
667 // TestEvent only:
668 // The Elapsed field is set for "pass" and "fail" events. It gives the time elapsed in seconds
669 // for the specific test or the overall package test that passed or failed.
670 Elapsed float64
671 // TestEvent:
672 // The Output field is set for Action == "output" and is a portion of the
673 // test's output (standard output and standard error merged together). The
674 // output is unmodified except that invalid UTF-8 output from a test is coerced
675 // into valid UTF-8 by use of replacement characters. With that one exception,
676 // the concatenation of the Output fields of all output events is the exact output
677 // of the test execution.
678 // BuildEvent:
679 // The Output field is set for Action == "build-output" and is a portion of
680 // the build's output. The concatenation of the Output fields of all output
681 // events is the exact output of the build. A single event may contain one
682 // or more lines of output and there may be more than one output event for
683 // a given ImportPath. This matches the definition of the TestEvent.Output
684 // field produced by go test -json.
685 Output string `json:"Output"`
686 // TestEvent only:
687 // The FailedBuild field is set for Action == "fail" if the test failure was caused
688 // by a build failure. It contains the package ID of the package that failed to
689 // build. This matches the ImportPath field of the "go list" output, as well as the
690 // BuildEvent.ImportPath field as emitted by "go build -json".
691 FailedBuild string `json:"FailedBuild"`
692}
693
694// parseTestResults converts test output in JSONL format into a slice of testJSON objects
695func parseTestResults(testOutput []byte) ([]testJSON, error) {
696 var results []testJSON
697 dec := json.NewDecoder(bytes.NewReader(testOutput))
698 for {
699 var event testJSON
700 if err := dec.Decode(&event); err != nil {
701 if err == io.EOF {
702 break
703 }
704 return nil, err
705 }
706 results = append(results, event)
707 }
708 return results, nil
709}
710
711// testStatus represents the status of a test in a given commit
712type testStatus int
713
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700714//go:generate go tool golang.org/x/tools/cmd/stringer -type=testStatus -trimprefix=testStatus
Earl Lee2e463fb2025-04-17 11:22:22 -0700715const (
716 testStatusUnknown testStatus = iota
717 testStatusPass
718 testStatusFail
719 testStatusBuildFail
720 testStatusSkip
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000721 testStatusNoTests // no tests exist for this package
Earl Lee2e463fb2025-04-17 11:22:22 -0700722)
723
Earl Lee2e463fb2025-04-17 11:22:22 -0700724// testRegression represents a test that regressed between commits
725type testRegression struct {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000726 Package string
727 Test string // empty for package tests
Earl Lee2e463fb2025-04-17 11:22:22 -0700728 BeforeStatus testStatus
729 AfterStatus testStatus
730 Output string // failure output in the after state
731}
732
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000733func (r *testRegression) Source() string {
734 if r.Test == "" {
735 return r.Package
Earl Lee2e463fb2025-04-17 11:22:22 -0700736 }
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000737 return fmt.Sprintf("%s.%s", r.Package, r.Test)
738}
Earl Lee2e463fb2025-04-17 11:22:22 -0700739
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000740type packageResult struct {
741 Status testStatus // overall status for the package
742 TestStatus map[string]testStatus // name -> status
743 TestOutput map[string][]string // name -> output parts
744}
Earl Lee2e463fb2025-04-17 11:22:22 -0700745
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000746// collectTestStatuses processes a slice of test events and returns rich status information
747func collectTestStatuses(results []testJSON) map[string]*packageResult {
748 m := make(map[string]*packageResult)
749
750 for _, event := range results {
751 pkg := event.Package
752 p, ok := m[pkg]
753 if !ok {
754 p = new(packageResult)
755 p.TestStatus = make(map[string]testStatus)
756 p.TestOutput = make(map[string][]string)
757 m[pkg] = p
Earl Lee2e463fb2025-04-17 11:22:22 -0700758 }
759
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000760 switch event.Action {
761 case "output":
762 p.TestOutput[event.Test] = append(p.TestOutput[event.Test], event.Output)
Earl Lee2e463fb2025-04-17 11:22:22 -0700763 continue
Earl Lee2e463fb2025-04-17 11:22:22 -0700764 case "pass":
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000765 if event.Test == "" {
766 p.Status = testStatusPass
767 } else {
768 p.TestStatus[event.Test] = testStatusPass
769 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700770 case "fail":
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000771 if event.Test == "" {
772 if event.FailedBuild != "" {
773 p.Status = testStatusBuildFail
774 } else {
775 p.Status = testStatusFail
776 }
777 } else {
778 p.TestStatus[event.Test] = testStatusFail
779 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700780 case "skip":
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000781 if event.Test == "" {
782 p.Status = testStatusNoTests
783 } else {
784 p.TestStatus[event.Test] = testStatusSkip
785 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700786 }
787 }
788
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000789 return m
Earl Lee2e463fb2025-04-17 11:22:22 -0700790}
791
792// compareTestResults identifies tests that have regressed between commits
793func (r *CodeReviewer) compareTestResults(beforeResults, afterResults []testJSON) ([]testRegression, error) {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000794 before := collectTestStatuses(beforeResults)
795 after := collectTestStatuses(afterResults)
796 var testLevelRegressions []testRegression
797 var packageLevelRegressions []testRegression
Earl Lee2e463fb2025-04-17 11:22:22 -0700798
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000799 afterPkgs := slices.Sorted(maps.Keys(after))
800 for _, pkg := range afterPkgs {
801 afterResult := after[pkg]
802 afterStatus := afterResult.Status
803 // Can't short-circuit here when tests are passing, because we need to check for skipped tests.
804 beforeResult, ok := before[pkg]
805 beforeStatus := testStatusNoTests
806 if ok {
807 beforeStatus = beforeResult.Status
808 }
809 // If things no longer build, stop at the package level.
810 // Otherwise, proceed to the test level, so we have more precise information.
811 if afterStatus == testStatusBuildFail && beforeStatus != testStatusBuildFail {
812 packageLevelRegressions = append(packageLevelRegressions, testRegression{
813 Package: pkg,
814 BeforeStatus: beforeStatus,
815 AfterStatus: afterStatus,
816 })
817 continue
818 }
819 tests := slices.Sorted(maps.Keys(afterResult.TestStatus))
820 for _, test := range tests {
821 afterStatus := afterResult.TestStatus[test]
822 switch afterStatus {
823 case testStatusPass:
824 continue
825 case testStatusUnknown:
826 slog.WarnContext(context.Background(), "unknown test status", "package", pkg, "test", test)
827 continue
828 }
829 beforeStatus := beforeResult.TestStatus[test]
830 if isRegression(beforeStatus, afterStatus) {
831 testLevelRegressions = append(testLevelRegressions, testRegression{
832 Package: pkg,
833 Test: test,
834 BeforeStatus: beforeStatus,
835 AfterStatus: afterStatus,
836 Output: strings.Join(afterResult.TestOutput[test], ""),
837 })
838 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700839 }
840 }
841
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000842 // If we have test-level regressions, report only those
843 // Otherwise, report package-level regressions
Earl Lee2e463fb2025-04-17 11:22:22 -0700844 var regressions []testRegression
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000845 if len(testLevelRegressions) > 0 {
846 regressions = testLevelRegressions
847 } else {
848 regressions = packageLevelRegressions
Earl Lee2e463fb2025-04-17 11:22:22 -0700849 }
850
851 // Sort regressions for consistent output
852 slices.SortFunc(regressions, func(a, b testRegression) int {
853 // First by package
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000854 if c := strings.Compare(a.Package, b.Package); c != 0 {
Earl Lee2e463fb2025-04-17 11:22:22 -0700855 return c
856 }
857 // Then by test name
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000858 return strings.Compare(a.Test, b.Test)
Earl Lee2e463fb2025-04-17 11:22:22 -0700859 })
860
861 return regressions, nil
862}
863
864// badnessLevels maps test status to a badness level
865// Higher values indicate worse status (more severe issues)
866var badnessLevels = map[testStatus]int{
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000867 testStatusBuildFail: 5, // Worst
868 testStatusFail: 4,
869 testStatusSkip: 3,
870 testStatusNoTests: 2,
Earl Lee2e463fb2025-04-17 11:22:22 -0700871 testStatusPass: 1,
872 testStatusUnknown: 0, // Least bad - avoids false positives
873}
874
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000875// regressionFormatter defines a mapping of before/after state pairs to descriptive messages
876type regressionKey struct {
877 before, after testStatus
878}
879
880var regressionMessages = map[regressionKey]string{
881 {testStatusUnknown, testStatusBuildFail}: "New test has build/vet errors",
882 {testStatusUnknown, testStatusFail}: "New test is failing",
883 {testStatusUnknown, testStatusSkip}: "New test is skipped",
884 {testStatusPass, testStatusBuildFail}: "Was passing, now has build/vet errors",
885 {testStatusPass, testStatusFail}: "Was passing, now failing",
886 {testStatusPass, testStatusSkip}: "Was passing, now skipped",
887 {testStatusNoTests, testStatusBuildFail}: "Previously had no tests, now has build/vet errors",
888 {testStatusNoTests, testStatusFail}: "Previously had no tests, now has failing tests",
889 {testStatusNoTests, testStatusSkip}: "Previously had no tests, now has skipped tests",
890 {testStatusSkip, testStatusBuildFail}: "Was skipped, now has build/vet errors",
891 {testStatusSkip, testStatusFail}: "Was skipped, now failing",
892 {testStatusFail, testStatusBuildFail}: "Was failing, now has build/vet errors",
893}
894
Earl Lee2e463fb2025-04-17 11:22:22 -0700895// isRegression determines if a test has regressed based on before and after status
896// A regression is defined as an increase in badness level
897func isRegression(before, after testStatus) bool {
898 // Higher badness level means worse status
899 return badnessLevels[after] > badnessLevels[before]
900}
901
902// formatTestRegressions generates a human-readable summary of test regressions
903func (r *CodeReviewer) formatTestRegressions(regressions []testRegression) string {
904 if len(regressions) == 0 {
905 return ""
906 }
907
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000908 buf := new(strings.Builder)
Philip Zeyliger49edc922025-05-14 09:45:45 -0700909 fmt.Fprintf(buf, "Test regressions detected between initial commit (%s) and HEAD:\n\n", r.sketchBaseRef)
Earl Lee2e463fb2025-04-17 11:22:22 -0700910
911 for i, reg := range regressions {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000912 fmt.Fprintf(buf, "%d: %v: ", i+1, reg.Source())
913 key := regressionKey{reg.BeforeStatus, reg.AfterStatus}
914 message, exists := regressionMessages[key]
915 if !exists {
916 message = "Regression detected"
Earl Lee2e463fb2025-04-17 11:22:22 -0700917 }
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000918 fmt.Fprintf(buf, "%s\n", message)
Earl Lee2e463fb2025-04-17 11:22:22 -0700919 }
920
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000921 return buf.String()
Earl Lee2e463fb2025-04-17 11:22:22 -0700922}
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000923
924// RelatedFile represents a file historically related to the changed files
925type RelatedFile struct {
926 Path string // Path to the file
927 Correlation float64 // Correlation score (0.0-1.0)
928}
929
930// findRelatedFiles identifies files that are historically related to the changed files
931// by analyzing git commit history for co-occurrences.
932func (r *CodeReviewer) findRelatedFiles(ctx context.Context, changedFiles []string) ([]RelatedFile, error) {
933 commits, err := r.getCommitsTouchingFiles(ctx, changedFiles)
934 if err != nil {
935 return nil, fmt.Errorf("failed to get commits touching files: %w", err)
936 }
937 if len(commits) == 0 {
938 return nil, nil
939 }
940
941 relChanged := make(map[string]bool, len(changedFiles))
942 for _, file := range changedFiles {
943 rel, err := filepath.Rel(r.repoRoot, file)
944 if err != nil {
945 return nil, fmt.Errorf("failed to get relative path for %s: %w", file, err)
946 }
947 relChanged[rel] = true
948 }
949
950 historyFiles := make(map[string]int)
951 var historyMu sync.Mutex
952
953 maxWorkers := runtime.GOMAXPROCS(0)
954 semaphore := make(chan bool, maxWorkers)
955 var wg sync.WaitGroup
956
957 for _, commit := range commits {
958 wg.Add(1)
959 semaphore <- true // acquire
960
961 go func(commit string) {
962 defer wg.Done()
963 defer func() { <-semaphore }() // release
964 commitFiles, err := r.getFilesInCommit(ctx, commit)
965 if err != nil {
966 slog.WarnContext(ctx, "Failed to get files in commit", "commit", commit, "err", err)
967 return
968 }
969 incr := 0
970 for _, file := range commitFiles {
971 if relChanged[file] {
972 incr++
973 }
974 }
975 if incr == 0 {
976 return
977 }
978 historyMu.Lock()
979 defer historyMu.Unlock()
980 for _, file := range commitFiles {
981 historyFiles[file] += incr
982 }
983 }(commit)
984 }
985 wg.Wait()
986
987 // normalize
988 maxCount := 0
989 for _, count := range historyFiles {
990 maxCount = max(maxCount, count)
991 }
992 if maxCount == 0 {
993 return nil, nil
994 }
995
996 var relatedFiles []RelatedFile
997 for file, count := range historyFiles {
998 if relChanged[file] {
999 // Don't include inputs in the output.
1000 continue
1001 }
1002 correlation := float64(count) / float64(maxCount)
1003 // Require min correlation to avoid noise
1004 if correlation >= 0.1 {
Pokey Rule75f93a12025-05-15 13:51:55 +00001005 // Check if the file still exists in the repository
1006 fullPath := filepath.Join(r.repoRoot, file)
1007 if _, err := os.Stat(fullPath); err == nil {
1008 relatedFiles = append(relatedFiles, RelatedFile{Path: file, Correlation: correlation})
1009 }
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +00001010 }
1011 }
1012
1013 // Highest correlation first
1014 slices.SortFunc(relatedFiles, func(a, b RelatedFile) int {
1015 return -1 * cmp.Compare(a.Correlation, b.Correlation)
1016 })
1017
1018 // Limit to 1 correlated file per input file.
1019 // (Arbitrary limit, to be adjusted.)
1020 maxFiles := len(changedFiles)
1021 if len(relatedFiles) > maxFiles {
1022 relatedFiles = relatedFiles[:maxFiles]
1023 }
1024
1025 // TODO: add an LLM in the mix here (like the keyword search tool) to do a filtering pass,
1026 // and then increase the strength of the wording in the relatedFiles message.
1027
1028 return relatedFiles, nil
1029}
1030
1031// getCommitsTouchingFiles returns all commits that touch any of the specified files
1032func (r *CodeReviewer) getCommitsTouchingFiles(ctx context.Context, files []string) ([]string, error) {
1033 if len(files) == 0 {
1034 return nil, nil
1035 }
Josh Bleecher Snyderd4b1cc72025-04-29 13:49:18 +00001036 fileArgs := append([]string{"rev-list", "--all", "--date-order", "--max-count=100", "--"}, files...)
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +00001037 cmd := exec.CommandContext(ctx, "git", fileArgs...)
1038 cmd.Dir = r.repoRoot
1039 out, err := cmd.CombinedOutput()
1040 if err != nil {
1041 return nil, fmt.Errorf("failed to get commits: %w\n%s", err, out)
1042 }
1043 return nonEmptyTrimmedLines(out), nil
1044}
1045
1046// getFilesInCommit returns all files changed in a specific commit
1047func (r *CodeReviewer) getFilesInCommit(ctx context.Context, commit string) ([]string, error) {
1048 cmd := exec.CommandContext(ctx, "git", "diff-tree", "--no-commit-id", "--name-only", "-r", commit)
1049 cmd.Dir = r.repoRoot
1050 out, err := cmd.CombinedOutput()
1051 if err != nil {
1052 return nil, fmt.Errorf("failed to get files in commit: %w\n%s", err, out)
1053 }
1054 return nonEmptyTrimmedLines(out), nil
1055}
1056
1057func nonEmptyTrimmedLines(b []byte) []string {
1058 var lines []string
1059 for line := range strings.Lines(string(b)) {
1060 line = strings.TrimSpace(line)
1061 if line != "" {
1062 lines = append(lines, line)
1063 }
1064 }
1065 return lines
1066}
1067
1068// formatRelatedFiles formats the related files list into a human-readable message
1069func (r *CodeReviewer) formatRelatedFiles(files []RelatedFile) string {
1070 if len(files) == 0 {
1071 return ""
1072 }
1073
1074 buf := new(strings.Builder)
1075
1076 fmt.Fprintf(buf, "Potentially related files:\n\n")
1077
1078 for _, file := range files {
1079 fmt.Fprintf(buf, "- %s (%0.0f%%)\n", file.Path, 100*file.Correlation)
1080 }
1081
1082 fmt.Fprintf(buf, "\nThese files have historically changed with the files you have modified. Consider whether they require updates as well.\n")
1083 return buf.String()
1084}
Josh Bleecher Snyderde5f7442025-05-15 18:32:32 +00001085
1086// shouldIgnoreDiagnostic reports whether a diagnostic message matches any of the patterns in goplsIgnore.
1087func shouldIgnoreDiagnostic(message string) bool {
1088 for _, pattern := range goplsIgnore {
1089 if strings.Contains(message, pattern) {
1090 return true
1091 }
1092 }
1093 return false
1094}