blob: d13cd9e1a701e16417ff175c3d9dd18d61e9b040 [file] [log] [blame]
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +00001package codereview
Earl Lee2e463fb2025-04-17 11:22:22 -07002
3import (
4 "bytes"
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +00005 "cmp"
Earl Lee2e463fb2025-04-17 11:22:22 -07006 "context"
7 "encoding/json"
8 "fmt"
9 "io"
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +000010 "io/fs"
Earl Lee2e463fb2025-04-17 11:22:22 -070011 "log/slog"
12 "maps"
13 "os"
14 "os/exec"
15 "path/filepath"
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +000016 "runtime"
Earl Lee2e463fb2025-04-17 11:22:22 -070017 "slices"
18 "strings"
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +000019 "sync"
Earl Lee2e463fb2025-04-17 11:22:22 -070020 "time"
21
22 "golang.org/x/tools/go/packages"
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070023 "sketch.dev/llm"
Earl Lee2e463fb2025-04-17 11:22:22 -070024)
25
26// This file does differential quality analysis of a commit relative to a base commit.
27
28// Tool returns a tool spec for a CodeReview tool backed by r.
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070029func (r *CodeReviewer) Tool() *llm.Tool {
30 spec := &llm.Tool{
Earl Lee2e463fb2025-04-17 11:22:22 -070031 Name: "codereview",
Josh Bleecher Snyder272c4a92025-06-04 16:08:42 -070032 Description: `Run an automated code review before presenting git commits to the user. Call if/when you've completed your current work and are ready for user feedback.`,
Earl Lee2e463fb2025-04-17 11:22:22 -070033 // If you modify this, update the termui template for prettier rendering.
philip.zeyliger57162e02025-06-18 09:59:48 -070034 InputSchema: json.RawMessage(`{
35 "type": "object",
36 "properties": {
37 "timeout": {
38 "type": "string",
39 "description": "Timeout as a Go duration string (default: 1m)",
40 "default": "1m"
41 }
42 }
43 }`),
44 Run: r.Run,
Earl Lee2e463fb2025-04-17 11:22:22 -070045 }
46 return spec
47}
48
Philip Zeyliger72252cb2025-05-10 17:00:08 -070049func (r *CodeReviewer) Run(ctx context.Context, m json.RawMessage) ([]llm.Content, error) {
philip.zeyliger57162e02025-06-18 09:59:48 -070050 // Parse input to get timeout
51 var input struct {
52 Timeout string `json:"timeout"`
53 }
54 if len(m) > 0 {
55 if err := json.Unmarshal(m, &input); err != nil {
56 return nil, fmt.Errorf("failed to parse input: %w", err)
57 }
58 }
59 if input.Timeout == "" {
60 input.Timeout = "1m" // default timeout
61 }
62
63 // Parse timeout duration
64 timeout, err := time.ParseDuration(input.Timeout)
65 if err != nil {
66 return nil, fmt.Errorf("invalid timeout duration %q: %w", input.Timeout, err)
67 }
68
69 // Create timeout context
70 timeoutCtx, cancel := context.WithTimeout(ctx, timeout)
71 defer cancel()
72
Josh Bleecher Snyder2dc86b92025-04-29 14:11:58 +000073 // NOTE: If you add or modify error messages here, update the corresponding UI parsing in:
74 // webui/src/web-components/sketch-tool-card.ts (SketchToolCardCodeReview.getStatusIcon)
philip.zeyliger57162e02025-06-18 09:59:48 -070075 if err := r.RequireNormalGitState(timeoutCtx); err != nil {
Earl Lee2e463fb2025-04-17 11:22:22 -070076 slog.DebugContext(ctx, "CodeReviewer.Run: failed to check for normal git state", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -070077 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -070078 }
philip.zeyliger57162e02025-06-18 09:59:48 -070079 if err := r.RequireNoUncommittedChanges(timeoutCtx); err != nil {
Earl Lee2e463fb2025-04-17 11:22:22 -070080 slog.DebugContext(ctx, "CodeReviewer.Run: failed to check for uncommitted changes", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -070081 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -070082 }
83
84 // Check that the current commit is not the initial commit
philip.zeyliger57162e02025-06-18 09:59:48 -070085 currentCommit, err := r.CurrentCommit(timeoutCtx)
Earl Lee2e463fb2025-04-17 11:22:22 -070086 if err != nil {
87 slog.DebugContext(ctx, "CodeReviewer.Run: failed to get current commit", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -070088 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -070089 }
90 if r.IsInitialCommit(currentCommit) {
91 slog.DebugContext(ctx, "CodeReviewer.Run: current commit is initial commit, nothing to review")
Philip Zeyliger72252cb2025-05-10 17:00:08 -070092 return nil, fmt.Errorf("no new commits have been added, nothing to review")
Earl Lee2e463fb2025-04-17 11:22:22 -070093 }
94
95 // No matter what failures happen from here out, we will declare this to have been reviewed.
96 // This should help avoid the model getting blocked by a broken code review tool.
97 r.reviewed = append(r.reviewed, currentCommit)
98
philip.zeyliger57162e02025-06-18 09:59:48 -070099 changedFiles, err := r.changedFiles(timeoutCtx, r.sketchBaseRef, currentCommit)
Earl Lee2e463fb2025-04-17 11:22:22 -0700100 if err != nil {
101 slog.DebugContext(ctx, "CodeReviewer.Run: failed to get changed files", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700102 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -0700103 }
104
105 // Prepare to analyze before/after for the impacted files.
106 // We use the current commit to determine what packages exist and are impacted.
107 // The packages in the initial commit may be different.
108 // Good enough for now.
109 // TODO: do better
philip.zeyliger57162e02025-06-18 09:59:48 -0700110 allPkgs, err := r.packagesForFiles(timeoutCtx, changedFiles)
Earl Lee2e463fb2025-04-17 11:22:22 -0700111 if err != nil {
112 // TODO: log and skip to stuff that doesn't require packages
113 slog.DebugContext(ctx, "CodeReviewer.Run: failed to get packages for files", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700114 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -0700115 }
116 allPkgList := slices.Collect(maps.Keys(allPkgs))
Earl Lee2e463fb2025-04-17 11:22:22 -0700117
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000118 var errorMessages []string // problems we want the model to address
119 var infoMessages []string // info the model should consider
120
Josh Bleecher Snydercf191902025-06-04 18:18:40 +0000121 // Run 'go generate' early, so that it can potentially fix tests that would otherwise fail.
philip.zeyliger57162e02025-06-18 09:59:48 -0700122 generateChanges, err := r.runGenerate(timeoutCtx, allPkgList)
Josh Bleecher Snydercf191902025-06-04 18:18:40 +0000123 if err != nil {
124 errorMessages = append(errorMessages, err.Error())
125 }
126 if len(generateChanges) > 0 {
127 buf := new(strings.Builder)
128 buf.WriteString("The following files were changed by running `go generate`:\n\n")
129 for _, f := range generateChanges {
130 buf.WriteString(f)
131 buf.WriteString("\n")
132 }
133 buf.WriteString("\nPlease amend your latest git commit with these changes.\n")
134 infoMessages = append(infoMessages, buf.String())
135 }
136
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000137 // Find potentially related files that should also be considered
138 // TODO: add some caching here, since this depends only on the initial commit and the changed files, not the details of the changes
Josh Bleecher Snydercf191902025-06-04 18:18:40 +0000139 // TODO: arrange for this to run even in non-Go repos!
philip.zeyliger57162e02025-06-18 09:59:48 -0700140 relatedFiles, err := r.findRelatedFiles(timeoutCtx, changedFiles)
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000141 if err != nil {
142 slog.DebugContext(ctx, "CodeReviewer.Run: failed to find related files", "err", err)
143 } else {
144 relatedMsg := r.formatRelatedFiles(relatedFiles)
145 if relatedMsg != "" {
146 infoMessages = append(infoMessages, relatedMsg)
147 }
148 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700149
philip.zeyliger57162e02025-06-18 09:59:48 -0700150 testMsg, err := r.checkTests(timeoutCtx, allPkgList)
Earl Lee2e463fb2025-04-17 11:22:22 -0700151 if err != nil {
152 slog.DebugContext(ctx, "CodeReviewer.Run: failed to check tests", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700153 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -0700154 }
155 if testMsg != "" {
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000156 errorMessages = append(errorMessages, testMsg)
Earl Lee2e463fb2025-04-17 11:22:22 -0700157 }
158
philip.zeyliger57162e02025-06-18 09:59:48 -0700159 goplsMsg, err := r.checkGopls(timeoutCtx, changedFiles) // includes vet checks
Earl Lee2e463fb2025-04-17 11:22:22 -0700160 if err != nil {
161 slog.DebugContext(ctx, "CodeReviewer.Run: failed to check gopls", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700162 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -0700163 }
164 if goplsMsg != "" {
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000165 errorMessages = append(errorMessages, goplsMsg)
Earl Lee2e463fb2025-04-17 11:22:22 -0700166 }
167
Josh Bleecher Snyder2dc86b92025-04-29 14:11:58 +0000168 // NOTE: If you change this output format, update the corresponding UI parsing in:
169 // webui/src/web-components/sketch-tool-card.ts (SketchToolCardCodeReview.getStatusIcon)
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000170 buf := new(strings.Builder)
171 if len(infoMessages) > 0 {
172 buf.WriteString("# Info\n\n")
173 buf.WriteString(strings.Join(infoMessages, "\n\n"))
174 buf.WriteString("\n\n")
Earl Lee2e463fb2025-04-17 11:22:22 -0700175 }
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000176 if len(errorMessages) > 0 {
177 buf.WriteString("# Errors\n\n")
178 buf.WriteString(strings.Join(errorMessages, "\n\n"))
179 buf.WriteString("\n\nPlease fix before proceeding.\n")
180 }
181 if buf.Len() == 0 {
182 buf.WriteString("OK")
183 }
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700184 return llm.TextContent(buf.String()), nil
Earl Lee2e463fb2025-04-17 11:22:22 -0700185}
186
187func (r *CodeReviewer) initializeInitialCommitWorktree(ctx context.Context) error {
188 if r.initialWorktree != "" {
189 return nil
190 }
191 tmpDir, err := os.MkdirTemp("", "sketch-codereview-worktree")
192 if err != nil {
193 return err
194 }
Philip Zeyliger49edc922025-05-14 09:45:45 -0700195 worktreeCmd := exec.CommandContext(ctx, "git", "worktree", "add", "--detach", tmpDir, r.sketchBaseRef)
Earl Lee2e463fb2025-04-17 11:22:22 -0700196 worktreeCmd.Dir = r.repoRoot
197 out, err := worktreeCmd.CombinedOutput()
198 if err != nil {
199 return fmt.Errorf("unable to create worktree for initial commit: %w\n%s", err, out)
200 }
201 r.initialWorktree = tmpDir
202 return nil
203}
204
205func (r *CodeReviewer) checkTests(ctx context.Context, pkgList []string) (string, error) {
Josh Bleecher Snyderde5f7442025-05-15 18:32:32 +0000206 // 'gopls check' covers everything that 'go vet' covers.
207 // Disabling vet here speeds things up, and allows more precise filtering and reporting.
208 goTestArgs := []string{"test", "-json", "-v", "-vet=off"}
Earl Lee2e463fb2025-04-17 11:22:22 -0700209 goTestArgs = append(goTestArgs, pkgList...)
210
211 afterTestCmd := exec.CommandContext(ctx, "go", goTestArgs...)
212 afterTestCmd.Dir = r.repoRoot
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000213 afterTestOut, _ := afterTestCmd.Output()
214 // unfortunately, we can't short-circuit here even if all tests pass,
215 // because we need to check for skipped tests.
Earl Lee2e463fb2025-04-17 11:22:22 -0700216
217 err := r.initializeInitialCommitWorktree(ctx)
218 if err != nil {
219 return "", err
220 }
221
222 beforeTestCmd := exec.CommandContext(ctx, "go", goTestArgs...)
223 beforeTestCmd.Dir = r.initialWorktree
224 beforeTestOut, _ := beforeTestCmd.Output() // ignore error, interesting info is in the output
225
226 // Parse the jsonl test results
227 beforeResults, beforeParseErr := parseTestResults(beforeTestOut)
228 if beforeParseErr != nil {
229 return "", fmt.Errorf("unable to parse test results for initial commit: %w\n%s", beforeParseErr, beforeTestOut)
230 }
231 afterResults, afterParseErr := parseTestResults(afterTestOut)
232 if afterParseErr != nil {
233 return "", fmt.Errorf("unable to parse test results for current commit: %w\n%s", afterParseErr, afterTestOut)
234 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700235 testRegressions, err := r.compareTestResults(beforeResults, afterResults)
236 if err != nil {
237 return "", fmt.Errorf("failed to compare test results: %w", err)
238 }
239 // TODO: better output formatting?
240 res := r.formatTestRegressions(testRegressions)
241 return res, nil
242}
243
Earl Lee2e463fb2025-04-17 11:22:22 -0700244// GoplsIssue represents a single issue reported by gopls check
245type GoplsIssue struct {
246 Position string // File position in format "file:line:col-range"
247 Message string // Description of the issue
248}
249
Josh Bleecher Snyderde5f7442025-05-15 18:32:32 +0000250// goplsIgnore contains substring patterns for gopls (and vet) diagnostic messages that should be suppressed.
251var goplsIgnore = []string{
252 // these are often just wrong, see https://github.com/golang/go/issues/57059#issuecomment-2884771470
253 "ends with redundant newline",
254
255 // as of May 2025, Claude doesn't understand strings/bytes.SplitSeq well enough to use it
256 "SplitSeq",
257}
258
Earl Lee2e463fb2025-04-17 11:22:22 -0700259// checkGopls runs gopls check on the provided files in both the current and initial state,
260// compares the results, and reports any new issues introduced in the current state.
261func (r *CodeReviewer) checkGopls(ctx context.Context, changedFiles []string) (string, error) {
262 if len(changedFiles) == 0 {
263 return "", nil // no files to check
264 }
265
266 // Filter out non-Go files as gopls only works on Go files
267 // and verify they still exist (not deleted)
268 var goFiles []string
269 for _, file := range changedFiles {
270 if !strings.HasSuffix(file, ".go") {
271 continue // not a Go file
272 }
273
274 // Check if the file still exists (not deleted)
275 if _, err := os.Stat(file); os.IsNotExist(err) {
276 continue // file doesn't exist anymore (deleted)
277 }
278
279 goFiles = append(goFiles, file)
280 }
281
282 if len(goFiles) == 0 {
283 return "", nil // no Go files to check
284 }
285
286 // Run gopls check on the current state
287 goplsArgs := append([]string{"check"}, goFiles...)
288
289 afterGoplsCmd := exec.CommandContext(ctx, "gopls", goplsArgs...)
290 afterGoplsCmd.Dir = r.repoRoot
291 afterGoplsOut, err := afterGoplsCmd.CombinedOutput() // gopls returns non-zero if it finds issues
292 if err != nil {
293 // Check if the output looks like real gopls issues or if it's just error output
294 if !looksLikeGoplsIssues(afterGoplsOut) {
295 slog.WarnContext(ctx, "CodeReviewer.checkGopls: gopls check failed to run properly", "err", err, "output", string(afterGoplsOut))
296 return "", nil // Skip rather than failing the entire code review
297 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700298 }
299
300 // Parse the output
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000301 afterIssues := parseGoplsOutput(r.repoRoot, afterGoplsOut)
Earl Lee2e463fb2025-04-17 11:22:22 -0700302
303 // If no issues were found, we're done
304 if len(afterIssues) == 0 {
305 return "", nil
306 }
307
308 // Gopls detected issues in the current state, check if they existed in the initial state
309 initErr := r.initializeInitialCommitWorktree(ctx)
310 if initErr != nil {
311 return "", err
312 }
313
314 // For each file that exists in the initial commit, run gopls check
315 var initialFilesToCheck []string
316 for _, file := range goFiles {
317 // Get relative path for git operations
318 relFile, err := filepath.Rel(r.repoRoot, file)
319 if err != nil {
320 slog.WarnContext(ctx, "CodeReviewer.checkGopls: failed to get relative path", "repo_root", r.repoRoot, "file", file, "err", err)
321 continue
322 }
323
324 // Check if the file exists in the initial commit
Philip Zeyliger49edc922025-05-14 09:45:45 -0700325 checkCmd := exec.CommandContext(ctx, "git", "cat-file", "-e", fmt.Sprintf("%s:%s", r.sketchBaseRef, relFile))
Earl Lee2e463fb2025-04-17 11:22:22 -0700326 checkCmd.Dir = r.repoRoot
327 if err := checkCmd.Run(); err == nil {
328 // File exists in initial commit
329 initialFilePath := filepath.Join(r.initialWorktree, relFile)
330 initialFilesToCheck = append(initialFilesToCheck, initialFilePath)
331 }
332 }
333
334 // Run gopls check on the files that existed in the initial commit
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000335 var beforeIssues []GoplsIssue
Earl Lee2e463fb2025-04-17 11:22:22 -0700336 if len(initialFilesToCheck) > 0 {
337 beforeGoplsArgs := append([]string{"check"}, initialFilesToCheck...)
338 beforeGoplsCmd := exec.CommandContext(ctx, "gopls", beforeGoplsArgs...)
339 beforeGoplsCmd.Dir = r.initialWorktree
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000340 beforeGoplsOut, beforeCmdErr := beforeGoplsCmd.CombinedOutput()
Earl Lee2e463fb2025-04-17 11:22:22 -0700341 if beforeCmdErr != nil && !looksLikeGoplsIssues(beforeGoplsOut) {
342 // If gopls fails to run properly on the initial commit, log a warning and continue
343 // with empty before issues - this will be conservative and report more issues
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000344 slog.WarnContext(ctx, "CodeReviewer.checkGopls: gopls check failed on initial commit", "err", err, "output", string(beforeGoplsOut))
Earl Lee2e463fb2025-04-17 11:22:22 -0700345 } else {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000346 beforeIssues = parseGoplsOutput(r.initialWorktree, beforeGoplsOut)
Earl Lee2e463fb2025-04-17 11:22:22 -0700347 }
348 }
349
350 // Find new issues that weren't present in the initial state
351 goplsRegressions := findGoplsRegressions(beforeIssues, afterIssues)
352 if len(goplsRegressions) == 0 {
353 return "", nil // no new issues
354 }
355
356 // Format the results
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000357 return r.formatGoplsRegressions(goplsRegressions), nil
Earl Lee2e463fb2025-04-17 11:22:22 -0700358}
359
Josh Bleecher Snyderde5f7442025-05-15 18:32:32 +0000360// parseGoplsOutput parses the text output from gopls check.
361// It drops any that match the patterns in goplsIgnore.
Earl Lee2e463fb2025-04-17 11:22:22 -0700362// Each line has the format: '/path/to/file.go:448:22-26: unused parameter: path'
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000363func parseGoplsOutput(root string, output []byte) []GoplsIssue {
Earl Lee2e463fb2025-04-17 11:22:22 -0700364 var issues []GoplsIssue
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +0000365 for line := range strings.Lines(string(output)) {
Earl Lee2e463fb2025-04-17 11:22:22 -0700366 line = strings.TrimSpace(line)
367 if line == "" {
368 continue
369 }
370
371 // Skip lines that look like error messages rather than gopls issues
372 if strings.HasPrefix(line, "Error:") ||
373 strings.HasPrefix(line, "Failed:") ||
374 strings.HasPrefix(line, "Warning:") ||
375 strings.HasPrefix(line, "gopls:") {
376 continue
377 }
378
379 // Find the first colon that separates the file path from the line number
380 firstColonIdx := strings.Index(line, ":")
381 if firstColonIdx < 0 {
382 continue // Invalid format
383 }
384
385 // Verify the part before the first colon looks like a file path
386 potentialPath := line[:firstColonIdx]
387 if !strings.HasSuffix(potentialPath, ".go") {
388 continue // Not a Go file path
389 }
390
391 // Find the position of the first message separator ': '
392 // This separates the position info from the message
393 messageStart := strings.Index(line, ": ")
394 if messageStart < 0 || messageStart <= firstColonIdx {
395 continue // Invalid format
396 }
397
398 // Extract position and message
399 position := line[:messageStart]
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000400 rel, err := filepath.Rel(root, position)
401 if err == nil {
402 position = rel
403 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700404 message := line[messageStart+2:] // Skip the ': ' separator
405
406 // Verify position has the expected format (at least 2 colons for line:col)
407 colonCount := strings.Count(position, ":")
408 if colonCount < 2 {
409 continue // Not enough position information
410 }
411
Josh Bleecher Snyderde5f7442025-05-15 18:32:32 +0000412 // Skip diagnostics that match any of our ignored patterns
413 if shouldIgnoreDiagnostic(message) {
414 continue
415 }
416
Earl Lee2e463fb2025-04-17 11:22:22 -0700417 issues = append(issues, GoplsIssue{
418 Position: position,
419 Message: message,
420 })
421 }
422
423 return issues
424}
425
426// looksLikeGoplsIssues checks if the output appears to be actual gopls issues
427// rather than error messages about gopls itself failing
428func looksLikeGoplsIssues(output []byte) bool {
429 // If output is empty, it's not valid issues
430 if len(output) == 0 {
431 return false
432 }
433
434 // Check if output has at least one line that looks like a gopls issue
435 // A gopls issue looks like: '/path/to/file.go:123:45-67: message'
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +0000436 for line := range strings.Lines(string(output)) {
Earl Lee2e463fb2025-04-17 11:22:22 -0700437 line = strings.TrimSpace(line)
438 if line == "" {
439 continue
440 }
441
442 // A gopls issue has at least two colons (file path, line number, column)
443 // and contains a colon followed by a space (separating position from message)
444 colonCount := strings.Count(line, ":")
445 hasSeparator := strings.Contains(line, ": ")
446
447 if colonCount >= 2 && hasSeparator {
448 // Check if it starts with a likely file path (ending in .go)
449 parts := strings.SplitN(line, ":", 2)
450 if strings.HasSuffix(parts[0], ".go") {
451 return true
452 }
453 }
454 }
455 return false
456}
457
458// normalizeGoplsPosition extracts just the file path from a position string
459func normalizeGoplsPosition(position string) string {
460 // Extract just the file path by taking everything before the first colon
461 parts := strings.Split(position, ":")
462 if len(parts) < 1 {
463 return position
464 }
465 return parts[0]
466}
467
468// findGoplsRegressions identifies gopls issues that are new in the after state
469func findGoplsRegressions(before, after []GoplsIssue) []GoplsIssue {
470 var regressions []GoplsIssue
471
472 // Build map of before issues for easier lookup
473 beforeIssueMap := make(map[string]map[string]bool) // file -> message -> exists
474 for _, issue := range before {
475 file := normalizeGoplsPosition(issue.Position)
476 if _, exists := beforeIssueMap[file]; !exists {
477 beforeIssueMap[file] = make(map[string]bool)
478 }
479 // Store both the exact message and the general issue type for fuzzy matching
480 beforeIssueMap[file][issue.Message] = true
481
482 // Extract the general issue type (everything before the first ':' in the message)
483 generalIssue := issue.Message
484 if colonIdx := strings.Index(issue.Message, ":"); colonIdx > 0 {
485 generalIssue = issue.Message[:colonIdx]
486 }
487 beforeIssueMap[file][generalIssue] = true
488 }
489
490 // Check each after issue to see if it's new
491 for _, afterIssue := range after {
492 file := normalizeGoplsPosition(afterIssue.Position)
493 isNew := true
494
495 if fileIssues, fileExists := beforeIssueMap[file]; fileExists {
496 // Check for exact message match
497 if fileIssues[afterIssue.Message] {
498 isNew = false
499 } else {
500 // Check for general issue type match
501 generalIssue := afterIssue.Message
502 if colonIdx := strings.Index(afterIssue.Message, ":"); colonIdx > 0 {
503 generalIssue = afterIssue.Message[:colonIdx]
504 }
505 if fileIssues[generalIssue] {
506 isNew = false
507 }
508 }
509 }
510
511 if isNew {
512 regressions = append(regressions, afterIssue)
513 }
514 }
515
516 // Sort regressions for deterministic output
517 slices.SortFunc(regressions, func(a, b GoplsIssue) int {
518 return strings.Compare(a.Position, b.Position)
519 })
520
521 return regressions
522}
523
524// formatGoplsRegressions generates a human-readable summary of gopls check regressions
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000525func (r *CodeReviewer) formatGoplsRegressions(regressions []GoplsIssue) string {
Earl Lee2e463fb2025-04-17 11:22:22 -0700526 if len(regressions) == 0 {
527 return ""
528 }
529
530 var sb strings.Builder
531 sb.WriteString("Gopls check issues detected:\n\n")
532
533 // Format each issue
534 for i, issue := range regressions {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000535 sb.WriteString(fmt.Sprintf("%d. %s: %s\n", i+1, filepath.Join(r.repoRoot, issue.Position), issue.Message))
Earl Lee2e463fb2025-04-17 11:22:22 -0700536 }
537
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000538 sb.WriteString("\nIMPORTANT: Only fix new gopls check issues in parts of the code that you have already edited.")
539 sb.WriteString(" Do not change existing code that was not part of your current edits.\n")
Earl Lee2e463fb2025-04-17 11:22:22 -0700540 return sb.String()
541}
542
543func (r *CodeReviewer) HasReviewed(commit string) bool {
544 return slices.Contains(r.reviewed, commit)
545}
546
547func (r *CodeReviewer) IsInitialCommit(commit string) bool {
Philip Zeyliger49edc922025-05-14 09:45:45 -0700548 return commit == r.sketchBaseRef
Earl Lee2e463fb2025-04-17 11:22:22 -0700549}
550
Philip Zeyliger49edc922025-05-14 09:45:45 -0700551// requireHEADDescendantOfSketchBaseRef returns an error if HEAD is not a descendant of r.initialCommit.
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000552// This serves two purposes:
553// - ensures we're not still on the initial commit
554// - ensures we're not on a separate branch or upstream somewhere, which would be confusing
Philip Zeyliger49edc922025-05-14 09:45:45 -0700555func (r *CodeReviewer) requireHEADDescendantOfSketchBaseRef(ctx context.Context) error {
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000556 head, err := r.CurrentCommit(ctx)
557 if err != nil {
558 return err
559 }
560
561 // Note: Git's merge-base --is-ancestor checks strict ancestry (i.e., <), so a commit is NOT an ancestor of itself.
Philip Zeyliger49edc922025-05-14 09:45:45 -0700562 cmd := exec.CommandContext(ctx, "git", "merge-base", "--is-ancestor", r.sketchBaseRef, head)
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000563 cmd.Dir = r.repoRoot
564 err = cmd.Run()
565 if err != nil {
566 if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
567 // Exit code 1 means not an ancestor
568 return fmt.Errorf("HEAD is not a descendant of the initial commit")
569 }
570 return fmt.Errorf("failed to check whether initial commit is ancestor: %w", err)
571 }
572 return nil
573}
574
Earl Lee2e463fb2025-04-17 11:22:22 -0700575// packagesForFiles returns maps of packages related to the given files:
576// 1. directPkgs: packages that directly contain the changed files
577// 2. allPkgs: all packages that might be affected, including downstream packages that depend on the direct packages
578// It may include false positives.
579// Files must be absolute paths!
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000580func (r *CodeReviewer) packagesForFiles(ctx context.Context, files []string) (allPkgs map[string]*packages.Package, err error) {
Earl Lee2e463fb2025-04-17 11:22:22 -0700581 for _, f := range files {
582 if !filepath.IsAbs(f) {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000583 return nil, fmt.Errorf("path %q is not absolute", f)
Earl Lee2e463fb2025-04-17 11:22:22 -0700584 }
585 }
586 cfg := &packages.Config{
587 Mode: packages.LoadImports | packages.NeedEmbedFiles,
588 Context: ctx,
589 // Logf: func(msg string, args ...any) {
590 // slog.DebugContext(ctx, "loading go packages", "msg", fmt.Sprintf(msg, args...))
591 // },
592 // TODO: in theory, go.mod might not be in the repo root, and there might be multiple go.mod files.
593 // We can cross that bridge when we get there.
594 Dir: r.repoRoot,
595 Tests: true,
596 }
597 universe, err := packages.Load(cfg, "./...")
598 if err != nil {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000599 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -0700600 }
601 // Identify packages that directly contain the changed files
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000602 directPkgs := make(map[string]*packages.Package) // import path -> package
Earl Lee2e463fb2025-04-17 11:22:22 -0700603 for _, pkg := range universe {
Earl Lee2e463fb2025-04-17 11:22:22 -0700604 pkgFiles := allFiles(pkg)
Earl Lee2e463fb2025-04-17 11:22:22 -0700605 for _, file := range files {
606 if pkgFiles[file] {
607 // prefer test packages, as they contain strictly more files (right?)
608 prev := directPkgs[pkg.PkgPath]
609 if prev == nil || prev.ForTest == "" {
610 directPkgs[pkg.PkgPath] = pkg
611 }
612 }
613 }
614 }
615
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000616 allPkgs = maps.Clone(directPkgs)
Earl Lee2e463fb2025-04-17 11:22:22 -0700617
618 // Add packages that depend on the direct packages
619 addDependentPackages(universe, allPkgs)
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000620 return allPkgs, nil
Earl Lee2e463fb2025-04-17 11:22:22 -0700621}
622
623// allFiles returns all files that might be referenced by the package.
624// It may contain false positives.
625func allFiles(p *packages.Package) map[string]bool {
626 files := make(map[string]bool)
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000627 // Add files from package info
Earl Lee2e463fb2025-04-17 11:22:22 -0700628 add := [][]string{p.GoFiles, p.CompiledGoFiles, p.OtherFiles, p.EmbedFiles, p.IgnoredFiles}
629 for _, extra := range add {
630 for _, file := range extra {
631 files[file] = true
632 }
633 }
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000634 // Add files from testdata directory
635 testdataDir := filepath.Join(p.Dir, "testdata")
636 if _, err := os.Stat(testdataDir); err == nil {
637 fsys := os.DirFS(p.Dir)
638 fs.WalkDir(fsys, "testdata", func(path string, d fs.DirEntry, err error) error {
639 if err == nil && !d.IsDir() {
640 files[filepath.Join(p.Dir, path)] = true
641 }
642 return nil
643 })
644 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700645 return files
646}
647
648// addDependentPackages adds to pkgs all packages from universe
649// that directly or indirectly depend on any package already in pkgs.
650func addDependentPackages(universe []*packages.Package, pkgs map[string]*packages.Package) {
651 for {
652 changed := false
653 for _, p := range universe {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000654 if strings.HasSuffix(p.PkgPath, ".test") { // ick, but I don't see another way
655 // skip test packages
656 continue
657 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700658 if _, ok := pkgs[p.PkgPath]; ok {
659 // already in pkgs
660 continue
661 }
662 for importPath := range p.Imports {
663 if _, ok := pkgs[importPath]; ok {
664 // imports a package dependent on pkgs, add it
665 pkgs[p.PkgPath] = p
666 changed = true
667 break
668 }
669 }
670 }
671 if !changed {
672 break
673 }
674 }
675}
676
677// testJSON is a union of BuildEvent and TestEvent
678type testJSON struct {
679 // TestEvent only:
680 // The Time field holds the time the event happened. It is conventionally omitted
681 // for cached test results.
682 Time time.Time `json:"Time"`
683 // BuildEvent only:
684 // The ImportPath field gives the package ID of the package being built.
685 // This matches the Package.ImportPath field of go list -json and the
686 // TestEvent.FailedBuild field of go test -json. Note that it does not
687 // match TestEvent.Package.
688 ImportPath string `json:"ImportPath"` // BuildEvent only
689 // TestEvent only:
690 // The Package field, if present, specifies the package being tested. When the
691 // go command runs parallel tests in -json mode, events from different tests are
692 // interlaced; the Package field allows readers to separate them.
693 Package string `json:"Package"`
694 // Action is used in both BuildEvent and TestEvent.
695 // It is the key to distinguishing between them.
696 // BuildEvent:
697 // build-output or build-fail
698 // TestEvent:
699 // start, run, pause, cont, pass, bench, fail, output, skip
700 Action string `json:"Action"`
701 // TestEvent only:
702 // The Test field, if present, specifies the test, example, or benchmark function
703 // that caused the event. Events for the overall package test do not set Test.
704 Test string `json:"Test"`
705 // TestEvent only:
706 // The Elapsed field is set for "pass" and "fail" events. It gives the time elapsed in seconds
707 // for the specific test or the overall package test that passed or failed.
708 Elapsed float64
709 // TestEvent:
710 // The Output field is set for Action == "output" and is a portion of the
711 // test's output (standard output and standard error merged together). The
712 // output is unmodified except that invalid UTF-8 output from a test is coerced
713 // into valid UTF-8 by use of replacement characters. With that one exception,
714 // the concatenation of the Output fields of all output events is the exact output
715 // of the test execution.
716 // BuildEvent:
717 // The Output field is set for Action == "build-output" and is a portion of
718 // the build's output. The concatenation of the Output fields of all output
719 // events is the exact output of the build. A single event may contain one
720 // or more lines of output and there may be more than one output event for
721 // a given ImportPath. This matches the definition of the TestEvent.Output
722 // field produced by go test -json.
723 Output string `json:"Output"`
724 // TestEvent only:
725 // The FailedBuild field is set for Action == "fail" if the test failure was caused
726 // by a build failure. It contains the package ID of the package that failed to
727 // build. This matches the ImportPath field of the "go list" output, as well as the
728 // BuildEvent.ImportPath field as emitted by "go build -json".
729 FailedBuild string `json:"FailedBuild"`
730}
731
732// parseTestResults converts test output in JSONL format into a slice of testJSON objects
733func parseTestResults(testOutput []byte) ([]testJSON, error) {
734 var results []testJSON
735 dec := json.NewDecoder(bytes.NewReader(testOutput))
736 for {
737 var event testJSON
738 if err := dec.Decode(&event); err != nil {
739 if err == io.EOF {
740 break
741 }
742 return nil, err
743 }
744 results = append(results, event)
745 }
746 return results, nil
747}
748
749// testStatus represents the status of a test in a given commit
750type testStatus int
751
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700752//go:generate go tool golang.org/x/tools/cmd/stringer -type=testStatus -trimprefix=testStatus
Earl Lee2e463fb2025-04-17 11:22:22 -0700753const (
754 testStatusUnknown testStatus = iota
755 testStatusPass
756 testStatusFail
757 testStatusBuildFail
758 testStatusSkip
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000759 testStatusNoTests // no tests exist for this package
Earl Lee2e463fb2025-04-17 11:22:22 -0700760)
761
Earl Lee2e463fb2025-04-17 11:22:22 -0700762// testRegression represents a test that regressed between commits
763type testRegression struct {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000764 Package string
765 Test string // empty for package tests
Earl Lee2e463fb2025-04-17 11:22:22 -0700766 BeforeStatus testStatus
767 AfterStatus testStatus
768 Output string // failure output in the after state
769}
770
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000771func (r *testRegression) Source() string {
772 if r.Test == "" {
773 return r.Package
Earl Lee2e463fb2025-04-17 11:22:22 -0700774 }
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000775 return fmt.Sprintf("%s.%s", r.Package, r.Test)
776}
Earl Lee2e463fb2025-04-17 11:22:22 -0700777
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000778type packageResult struct {
779 Status testStatus // overall status for the package
780 TestStatus map[string]testStatus // name -> status
781 TestOutput map[string][]string // name -> output parts
782}
Earl Lee2e463fb2025-04-17 11:22:22 -0700783
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000784// collectTestStatuses processes a slice of test events and returns rich status information
785func collectTestStatuses(results []testJSON) map[string]*packageResult {
786 m := make(map[string]*packageResult)
787
788 for _, event := range results {
789 pkg := event.Package
790 p, ok := m[pkg]
791 if !ok {
792 p = new(packageResult)
793 p.TestStatus = make(map[string]testStatus)
794 p.TestOutput = make(map[string][]string)
795 m[pkg] = p
Earl Lee2e463fb2025-04-17 11:22:22 -0700796 }
797
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000798 switch event.Action {
799 case "output":
800 p.TestOutput[event.Test] = append(p.TestOutput[event.Test], event.Output)
Earl Lee2e463fb2025-04-17 11:22:22 -0700801 continue
Earl Lee2e463fb2025-04-17 11:22:22 -0700802 case "pass":
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000803 if event.Test == "" {
804 p.Status = testStatusPass
805 } else {
806 p.TestStatus[event.Test] = testStatusPass
807 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700808 case "fail":
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000809 if event.Test == "" {
810 if event.FailedBuild != "" {
811 p.Status = testStatusBuildFail
812 } else {
813 p.Status = testStatusFail
814 }
815 } else {
816 p.TestStatus[event.Test] = testStatusFail
817 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700818 case "skip":
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000819 if event.Test == "" {
820 p.Status = testStatusNoTests
821 } else {
822 p.TestStatus[event.Test] = testStatusSkip
823 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700824 }
825 }
826
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000827 return m
Earl Lee2e463fb2025-04-17 11:22:22 -0700828}
829
830// compareTestResults identifies tests that have regressed between commits
831func (r *CodeReviewer) compareTestResults(beforeResults, afterResults []testJSON) ([]testRegression, error) {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000832 before := collectTestStatuses(beforeResults)
833 after := collectTestStatuses(afterResults)
834 var testLevelRegressions []testRegression
835 var packageLevelRegressions []testRegression
Earl Lee2e463fb2025-04-17 11:22:22 -0700836
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000837 afterPkgs := slices.Sorted(maps.Keys(after))
838 for _, pkg := range afterPkgs {
839 afterResult := after[pkg]
840 afterStatus := afterResult.Status
841 // Can't short-circuit here when tests are passing, because we need to check for skipped tests.
842 beforeResult, ok := before[pkg]
843 beforeStatus := testStatusNoTests
844 if ok {
845 beforeStatus = beforeResult.Status
846 }
847 // If things no longer build, stop at the package level.
848 // Otherwise, proceed to the test level, so we have more precise information.
849 if afterStatus == testStatusBuildFail && beforeStatus != testStatusBuildFail {
850 packageLevelRegressions = append(packageLevelRegressions, testRegression{
851 Package: pkg,
852 BeforeStatus: beforeStatus,
853 AfterStatus: afterStatus,
854 })
855 continue
856 }
857 tests := slices.Sorted(maps.Keys(afterResult.TestStatus))
858 for _, test := range tests {
859 afterStatus := afterResult.TestStatus[test]
860 switch afterStatus {
861 case testStatusPass:
862 continue
863 case testStatusUnknown:
864 slog.WarnContext(context.Background(), "unknown test status", "package", pkg, "test", test)
865 continue
866 }
867 beforeStatus := beforeResult.TestStatus[test]
868 if isRegression(beforeStatus, afterStatus) {
869 testLevelRegressions = append(testLevelRegressions, testRegression{
870 Package: pkg,
871 Test: test,
872 BeforeStatus: beforeStatus,
873 AfterStatus: afterStatus,
874 Output: strings.Join(afterResult.TestOutput[test], ""),
875 })
876 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700877 }
878 }
879
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000880 // If we have test-level regressions, report only those
881 // Otherwise, report package-level regressions
Earl Lee2e463fb2025-04-17 11:22:22 -0700882 var regressions []testRegression
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000883 if len(testLevelRegressions) > 0 {
884 regressions = testLevelRegressions
885 } else {
886 regressions = packageLevelRegressions
Earl Lee2e463fb2025-04-17 11:22:22 -0700887 }
888
889 // Sort regressions for consistent output
890 slices.SortFunc(regressions, func(a, b testRegression) int {
891 // First by package
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000892 if c := strings.Compare(a.Package, b.Package); c != 0 {
Earl Lee2e463fb2025-04-17 11:22:22 -0700893 return c
894 }
895 // Then by test name
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000896 return strings.Compare(a.Test, b.Test)
Earl Lee2e463fb2025-04-17 11:22:22 -0700897 })
898
899 return regressions, nil
900}
901
902// badnessLevels maps test status to a badness level
903// Higher values indicate worse status (more severe issues)
904var badnessLevels = map[testStatus]int{
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000905 testStatusBuildFail: 5, // Worst
906 testStatusFail: 4,
907 testStatusSkip: 3,
908 testStatusNoTests: 2,
Earl Lee2e463fb2025-04-17 11:22:22 -0700909 testStatusPass: 1,
910 testStatusUnknown: 0, // Least bad - avoids false positives
911}
912
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000913// regressionFormatter defines a mapping of before/after state pairs to descriptive messages
914type regressionKey struct {
915 before, after testStatus
916}
917
918var regressionMessages = map[regressionKey]string{
919 {testStatusUnknown, testStatusBuildFail}: "New test has build/vet errors",
920 {testStatusUnknown, testStatusFail}: "New test is failing",
921 {testStatusUnknown, testStatusSkip}: "New test is skipped",
922 {testStatusPass, testStatusBuildFail}: "Was passing, now has build/vet errors",
923 {testStatusPass, testStatusFail}: "Was passing, now failing",
924 {testStatusPass, testStatusSkip}: "Was passing, now skipped",
925 {testStatusNoTests, testStatusBuildFail}: "Previously had no tests, now has build/vet errors",
926 {testStatusNoTests, testStatusFail}: "Previously had no tests, now has failing tests",
927 {testStatusNoTests, testStatusSkip}: "Previously had no tests, now has skipped tests",
928 {testStatusSkip, testStatusBuildFail}: "Was skipped, now has build/vet errors",
929 {testStatusSkip, testStatusFail}: "Was skipped, now failing",
930 {testStatusFail, testStatusBuildFail}: "Was failing, now has build/vet errors",
931}
932
Earl Lee2e463fb2025-04-17 11:22:22 -0700933// isRegression determines if a test has regressed based on before and after status
934// A regression is defined as an increase in badness level
935func isRegression(before, after testStatus) bool {
936 // Higher badness level means worse status
937 return badnessLevels[after] > badnessLevels[before]
938}
939
940// formatTestRegressions generates a human-readable summary of test regressions
941func (r *CodeReviewer) formatTestRegressions(regressions []testRegression) string {
942 if len(regressions) == 0 {
943 return ""
944 }
945
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000946 buf := new(strings.Builder)
Philip Zeyliger49edc922025-05-14 09:45:45 -0700947 fmt.Fprintf(buf, "Test regressions detected between initial commit (%s) and HEAD:\n\n", r.sketchBaseRef)
Earl Lee2e463fb2025-04-17 11:22:22 -0700948
949 for i, reg := range regressions {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000950 fmt.Fprintf(buf, "%d: %v: ", i+1, reg.Source())
951 key := regressionKey{reg.BeforeStatus, reg.AfterStatus}
952 message, exists := regressionMessages[key]
953 if !exists {
954 message = "Regression detected"
Earl Lee2e463fb2025-04-17 11:22:22 -0700955 }
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000956 fmt.Fprintf(buf, "%s\n", message)
Earl Lee2e463fb2025-04-17 11:22:22 -0700957 }
958
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000959 return buf.String()
Earl Lee2e463fb2025-04-17 11:22:22 -0700960}
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000961
962// RelatedFile represents a file historically related to the changed files
963type RelatedFile struct {
964 Path string // Path to the file
965 Correlation float64 // Correlation score (0.0-1.0)
966}
967
968// findRelatedFiles identifies files that are historically related to the changed files
969// by analyzing git commit history for co-occurrences.
970func (r *CodeReviewer) findRelatedFiles(ctx context.Context, changedFiles []string) ([]RelatedFile, error) {
971 commits, err := r.getCommitsTouchingFiles(ctx, changedFiles)
972 if err != nil {
973 return nil, fmt.Errorf("failed to get commits touching files: %w", err)
974 }
975 if len(commits) == 0 {
976 return nil, nil
977 }
978
979 relChanged := make(map[string]bool, len(changedFiles))
980 for _, file := range changedFiles {
981 rel, err := filepath.Rel(r.repoRoot, file)
982 if err != nil {
983 return nil, fmt.Errorf("failed to get relative path for %s: %w", file, err)
984 }
985 relChanged[rel] = true
986 }
987
988 historyFiles := make(map[string]int)
989 var historyMu sync.Mutex
990
991 maxWorkers := runtime.GOMAXPROCS(0)
992 semaphore := make(chan bool, maxWorkers)
993 var wg sync.WaitGroup
994
995 for _, commit := range commits {
996 wg.Add(1)
997 semaphore <- true // acquire
998
999 go func(commit string) {
1000 defer wg.Done()
1001 defer func() { <-semaphore }() // release
1002 commitFiles, err := r.getFilesInCommit(ctx, commit)
1003 if err != nil {
1004 slog.WarnContext(ctx, "Failed to get files in commit", "commit", commit, "err", err)
1005 return
1006 }
1007 incr := 0
1008 for _, file := range commitFiles {
1009 if relChanged[file] {
1010 incr++
1011 }
1012 }
1013 if incr == 0 {
1014 return
1015 }
1016 historyMu.Lock()
1017 defer historyMu.Unlock()
1018 for _, file := range commitFiles {
1019 historyFiles[file] += incr
1020 }
1021 }(commit)
1022 }
1023 wg.Wait()
1024
1025 // normalize
1026 maxCount := 0
1027 for _, count := range historyFiles {
1028 maxCount = max(maxCount, count)
1029 }
1030 if maxCount == 0 {
1031 return nil, nil
1032 }
1033
1034 var relatedFiles []RelatedFile
1035 for file, count := range historyFiles {
1036 if relChanged[file] {
1037 // Don't include inputs in the output.
1038 continue
1039 }
1040 correlation := float64(count) / float64(maxCount)
1041 // Require min correlation to avoid noise
1042 if correlation >= 0.1 {
Pokey Rule75f93a12025-05-15 13:51:55 +00001043 // Check if the file still exists in the repository
1044 fullPath := filepath.Join(r.repoRoot, file)
1045 if _, err := os.Stat(fullPath); err == nil {
1046 relatedFiles = append(relatedFiles, RelatedFile{Path: file, Correlation: correlation})
1047 }
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +00001048 }
1049 }
1050
Josh Bleecher Snydera727f702025-06-04 18:47:33 -07001051 // Highest correlation first, then sort by path.
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +00001052 slices.SortFunc(relatedFiles, func(a, b RelatedFile) int {
Josh Bleecher Snydera727f702025-06-04 18:47:33 -07001053 return cmp.Or(
1054 -1*cmp.Compare(a.Correlation, b.Correlation),
1055 cmp.Compare(a.Path, b.Path),
1056 )
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +00001057 })
1058
1059 // Limit to 1 correlated file per input file.
1060 // (Arbitrary limit, to be adjusted.)
1061 maxFiles := len(changedFiles)
1062 if len(relatedFiles) > maxFiles {
1063 relatedFiles = relatedFiles[:maxFiles]
1064 }
1065
1066 // TODO: add an LLM in the mix here (like the keyword search tool) to do a filtering pass,
1067 // and then increase the strength of the wording in the relatedFiles message.
1068
1069 return relatedFiles, nil
1070}
1071
1072// getCommitsTouchingFiles returns all commits that touch any of the specified files
1073func (r *CodeReviewer) getCommitsTouchingFiles(ctx context.Context, files []string) ([]string, error) {
1074 if len(files) == 0 {
1075 return nil, nil
1076 }
Josh Bleecher Snyderd4b1cc72025-04-29 13:49:18 +00001077 fileArgs := append([]string{"rev-list", "--all", "--date-order", "--max-count=100", "--"}, files...)
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +00001078 cmd := exec.CommandContext(ctx, "git", fileArgs...)
1079 cmd.Dir = r.repoRoot
1080 out, err := cmd.CombinedOutput()
1081 if err != nil {
1082 return nil, fmt.Errorf("failed to get commits: %w\n%s", err, out)
1083 }
1084 return nonEmptyTrimmedLines(out), nil
1085}
1086
1087// getFilesInCommit returns all files changed in a specific commit
1088func (r *CodeReviewer) getFilesInCommit(ctx context.Context, commit string) ([]string, error) {
1089 cmd := exec.CommandContext(ctx, "git", "diff-tree", "--no-commit-id", "--name-only", "-r", commit)
1090 cmd.Dir = r.repoRoot
1091 out, err := cmd.CombinedOutput()
1092 if err != nil {
1093 return nil, fmt.Errorf("failed to get files in commit: %w\n%s", err, out)
1094 }
1095 return nonEmptyTrimmedLines(out), nil
1096}
1097
1098func nonEmptyTrimmedLines(b []byte) []string {
1099 var lines []string
1100 for line := range strings.Lines(string(b)) {
1101 line = strings.TrimSpace(line)
1102 if line != "" {
1103 lines = append(lines, line)
1104 }
1105 }
1106 return lines
1107}
1108
1109// formatRelatedFiles formats the related files list into a human-readable message
1110func (r *CodeReviewer) formatRelatedFiles(files []RelatedFile) string {
1111 if len(files) == 0 {
1112 return ""
1113 }
1114
1115 buf := new(strings.Builder)
1116
1117 fmt.Fprintf(buf, "Potentially related files:\n\n")
1118
1119 for _, file := range files {
1120 fmt.Fprintf(buf, "- %s (%0.0f%%)\n", file.Path, 100*file.Correlation)
1121 }
1122
1123 fmt.Fprintf(buf, "\nThese files have historically changed with the files you have modified. Consider whether they require updates as well.\n")
1124 return buf.String()
1125}
Josh Bleecher Snyderde5f7442025-05-15 18:32:32 +00001126
1127// shouldIgnoreDiagnostic reports whether a diagnostic message matches any of the patterns in goplsIgnore.
1128func shouldIgnoreDiagnostic(message string) bool {
1129 for _, pattern := range goplsIgnore {
1130 if strings.Contains(message, pattern) {
1131 return true
1132 }
1133 }
1134 return false
1135}