blob: 6c29d064234515e95b89d5b716a2b3b973129049 [file] [log] [blame]
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +00001package codereview
Earl Lee2e463fb2025-04-17 11:22:22 -07002
3import (
4 "bytes"
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +00005 "cmp"
Earl Lee2e463fb2025-04-17 11:22:22 -07006 "context"
Josh Bleecher Snyder26b6f9b2025-07-01 01:41:11 +00007 "crypto/sha256"
8 "encoding/hex"
Earl Lee2e463fb2025-04-17 11:22:22 -07009 "encoding/json"
10 "fmt"
11 "io"
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +000012 "io/fs"
Earl Lee2e463fb2025-04-17 11:22:22 -070013 "log/slog"
14 "maps"
15 "os"
16 "os/exec"
17 "path/filepath"
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +000018 "runtime"
Earl Lee2e463fb2025-04-17 11:22:22 -070019 "slices"
20 "strings"
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +000021 "sync"
Earl Lee2e463fb2025-04-17 11:22:22 -070022 "time"
23
24 "golang.org/x/tools/go/packages"
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070025 "sketch.dev/llm"
Earl Lee2e463fb2025-04-17 11:22:22 -070026)
27
28// This file does differential quality analysis of a commit relative to a base commit.
29
30// Tool returns a tool spec for a CodeReview tool backed by r.
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070031func (r *CodeReviewer) Tool() *llm.Tool {
32 spec := &llm.Tool{
Earl Lee2e463fb2025-04-17 11:22:22 -070033 Name: "codereview",
Josh Bleecher Snyder272c4a92025-06-04 16:08:42 -070034 Description: `Run an automated code review before presenting git commits to the user. Call if/when you've completed your current work and are ready for user feedback.`,
Earl Lee2e463fb2025-04-17 11:22:22 -070035 // If you modify this, update the termui template for prettier rendering.
philip.zeyliger57162e02025-06-18 09:59:48 -070036 InputSchema: json.RawMessage(`{
37 "type": "object",
38 "properties": {
39 "timeout": {
40 "type": "string",
41 "description": "Timeout as a Go duration string (default: 1m)",
42 "default": "1m"
43 }
44 }
45 }`),
46 Run: r.Run,
Earl Lee2e463fb2025-04-17 11:22:22 -070047 }
48 return spec
49}
50
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -070051func (r *CodeReviewer) Run(ctx context.Context, m json.RawMessage) llm.ToolOut {
philip.zeyliger57162e02025-06-18 09:59:48 -070052 // Parse input to get timeout
53 var input struct {
54 Timeout string `json:"timeout"`
55 }
56 if len(m) > 0 {
57 if err := json.Unmarshal(m, &input); err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -070058 return llm.ErrorfToolOut("failed to parse input: %w", err)
philip.zeyliger57162e02025-06-18 09:59:48 -070059 }
60 }
61 if input.Timeout == "" {
62 input.Timeout = "1m" // default timeout
63 }
64
65 // Parse timeout duration
66 timeout, err := time.ParseDuration(input.Timeout)
67 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -070068 return llm.ErrorfToolOut("invalid timeout duration %q: %w", input.Timeout, err)
philip.zeyliger57162e02025-06-18 09:59:48 -070069 }
70
71 // Create timeout context
72 timeoutCtx, cancel := context.WithTimeout(ctx, timeout)
73 defer cancel()
74
Josh Bleecher Snyder2dc86b92025-04-29 14:11:58 +000075 // NOTE: If you add or modify error messages here, update the corresponding UI parsing in:
76 // webui/src/web-components/sketch-tool-card.ts (SketchToolCardCodeReview.getStatusIcon)
philip.zeyliger57162e02025-06-18 09:59:48 -070077 if err := r.RequireNormalGitState(timeoutCtx); err != nil {
Earl Lee2e463fb2025-04-17 11:22:22 -070078 slog.DebugContext(ctx, "CodeReviewer.Run: failed to check for normal git state", "err", err)
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -070079 return llm.ErrorToolOut(err)
Earl Lee2e463fb2025-04-17 11:22:22 -070080 }
philip.zeyliger57162e02025-06-18 09:59:48 -070081 if err := r.RequireNoUncommittedChanges(timeoutCtx); err != nil {
Earl Lee2e463fb2025-04-17 11:22:22 -070082 slog.DebugContext(ctx, "CodeReviewer.Run: failed to check for uncommitted changes", "err", err)
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -070083 return llm.ErrorToolOut(err)
Earl Lee2e463fb2025-04-17 11:22:22 -070084 }
85
86 // Check that the current commit is not the initial commit
philip.zeyliger57162e02025-06-18 09:59:48 -070087 currentCommit, err := r.CurrentCommit(timeoutCtx)
Earl Lee2e463fb2025-04-17 11:22:22 -070088 if err != nil {
89 slog.DebugContext(ctx, "CodeReviewer.Run: failed to get current commit", "err", err)
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -070090 return llm.ErrorToolOut(err)
Earl Lee2e463fb2025-04-17 11:22:22 -070091 }
92 if r.IsInitialCommit(currentCommit) {
93 slog.DebugContext(ctx, "CodeReviewer.Run: current commit is initial commit, nothing to review")
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -070094 return llm.ErrorToolOut(fmt.Errorf("no new commits have been added, nothing to review"))
Earl Lee2e463fb2025-04-17 11:22:22 -070095 }
96
97 // No matter what failures happen from here out, we will declare this to have been reviewed.
98 // This should help avoid the model getting blocked by a broken code review tool.
99 r.reviewed = append(r.reviewed, currentCommit)
100
philip.zeyliger57162e02025-06-18 09:59:48 -0700101 changedFiles, err := r.changedFiles(timeoutCtx, r.sketchBaseRef, currentCommit)
Earl Lee2e463fb2025-04-17 11:22:22 -0700102 if err != nil {
103 slog.DebugContext(ctx, "CodeReviewer.Run: failed to get changed files", "err", err)
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700104 return llm.ErrorToolOut(err)
Earl Lee2e463fb2025-04-17 11:22:22 -0700105 }
106
107 // Prepare to analyze before/after for the impacted files.
108 // We use the current commit to determine what packages exist and are impacted.
109 // The packages in the initial commit may be different.
110 // Good enough for now.
111 // TODO: do better
philip.zeyliger57162e02025-06-18 09:59:48 -0700112 allPkgs, err := r.packagesForFiles(timeoutCtx, changedFiles)
Earl Lee2e463fb2025-04-17 11:22:22 -0700113 if err != nil {
114 // TODO: log and skip to stuff that doesn't require packages
115 slog.DebugContext(ctx, "CodeReviewer.Run: failed to get packages for files", "err", err)
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700116 return llm.ErrorToolOut(err)
Earl Lee2e463fb2025-04-17 11:22:22 -0700117 }
118 allPkgList := slices.Collect(maps.Keys(allPkgs))
Earl Lee2e463fb2025-04-17 11:22:22 -0700119
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000120 var errorMessages []string // problems we want the model to address
121 var infoMessages []string // info the model should consider
122
Josh Bleecher Snydercf191902025-06-04 18:18:40 +0000123 // Run 'go generate' early, so that it can potentially fix tests that would otherwise fail.
philip.zeyliger57162e02025-06-18 09:59:48 -0700124 generateChanges, err := r.runGenerate(timeoutCtx, allPkgList)
Josh Bleecher Snydercf191902025-06-04 18:18:40 +0000125 if err != nil {
126 errorMessages = append(errorMessages, err.Error())
127 }
128 if len(generateChanges) > 0 {
129 buf := new(strings.Builder)
130 buf.WriteString("The following files were changed by running `go generate`:\n\n")
131 for _, f := range generateChanges {
132 buf.WriteString(f)
133 buf.WriteString("\n")
134 }
135 buf.WriteString("\nPlease amend your latest git commit with these changes.\n")
136 infoMessages = append(infoMessages, buf.String())
137 }
138
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000139 // Find potentially related files that should also be considered
140 // TODO: add some caching here, since this depends only on the initial commit and the changed files, not the details of the changes
Josh Bleecher Snydercf191902025-06-04 18:18:40 +0000141 // TODO: arrange for this to run even in non-Go repos!
philip.zeyliger57162e02025-06-18 09:59:48 -0700142 relatedFiles, err := r.findRelatedFiles(timeoutCtx, changedFiles)
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000143 if err != nil {
144 slog.DebugContext(ctx, "CodeReviewer.Run: failed to find related files", "err", err)
145 } else {
146 relatedMsg := r.formatRelatedFiles(relatedFiles)
147 if relatedMsg != "" {
148 infoMessages = append(infoMessages, relatedMsg)
149 }
150 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700151
philip.zeyliger57162e02025-06-18 09:59:48 -0700152 testMsg, err := r.checkTests(timeoutCtx, allPkgList)
Earl Lee2e463fb2025-04-17 11:22:22 -0700153 if err != nil {
154 slog.DebugContext(ctx, "CodeReviewer.Run: failed to check tests", "err", err)
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700155 return llm.ErrorToolOut(err)
Earl Lee2e463fb2025-04-17 11:22:22 -0700156 }
157 if testMsg != "" {
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000158 errorMessages = append(errorMessages, testMsg)
Earl Lee2e463fb2025-04-17 11:22:22 -0700159 }
160
philip.zeyliger57162e02025-06-18 09:59:48 -0700161 goplsMsg, err := r.checkGopls(timeoutCtx, changedFiles) // includes vet checks
Earl Lee2e463fb2025-04-17 11:22:22 -0700162 if err != nil {
163 slog.DebugContext(ctx, "CodeReviewer.Run: failed to check gopls", "err", err)
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700164 return llm.ErrorToolOut(err)
Earl Lee2e463fb2025-04-17 11:22:22 -0700165 }
166 if goplsMsg != "" {
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000167 errorMessages = append(errorMessages, goplsMsg)
Earl Lee2e463fb2025-04-17 11:22:22 -0700168 }
169
Josh Bleecher Snyder2dc86b92025-04-29 14:11:58 +0000170 // NOTE: If you change this output format, update the corresponding UI parsing in:
171 // webui/src/web-components/sketch-tool-card.ts (SketchToolCardCodeReview.getStatusIcon)
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000172 buf := new(strings.Builder)
173 if len(infoMessages) > 0 {
174 buf.WriteString("# Info\n\n")
175 buf.WriteString(strings.Join(infoMessages, "\n\n"))
176 buf.WriteString("\n\n")
Earl Lee2e463fb2025-04-17 11:22:22 -0700177 }
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000178 if len(errorMessages) > 0 {
179 buf.WriteString("# Errors\n\n")
180 buf.WriteString(strings.Join(errorMessages, "\n\n"))
181 buf.WriteString("\n\nPlease fix before proceeding.\n")
182 }
183 if buf.Len() == 0 {
184 buf.WriteString("OK")
185 }
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700186 return llm.ToolOut{LLMContent: llm.TextContent(buf.String())}
Earl Lee2e463fb2025-04-17 11:22:22 -0700187}
188
189func (r *CodeReviewer) initializeInitialCommitWorktree(ctx context.Context) error {
190 if r.initialWorktree != "" {
191 return nil
192 }
193 tmpDir, err := os.MkdirTemp("", "sketch-codereview-worktree")
194 if err != nil {
195 return err
196 }
Philip Zeyliger49edc922025-05-14 09:45:45 -0700197 worktreeCmd := exec.CommandContext(ctx, "git", "worktree", "add", "--detach", tmpDir, r.sketchBaseRef)
Earl Lee2e463fb2025-04-17 11:22:22 -0700198 worktreeCmd.Dir = r.repoRoot
199 out, err := worktreeCmd.CombinedOutput()
200 if err != nil {
201 return fmt.Errorf("unable to create worktree for initial commit: %w\n%s", err, out)
202 }
203 r.initialWorktree = tmpDir
204 return nil
205}
206
207func (r *CodeReviewer) checkTests(ctx context.Context, pkgList []string) (string, error) {
Josh Bleecher Snyderde5f7442025-05-15 18:32:32 +0000208 // 'gopls check' covers everything that 'go vet' covers.
209 // Disabling vet here speeds things up, and allows more precise filtering and reporting.
210 goTestArgs := []string{"test", "-json", "-v", "-vet=off"}
Earl Lee2e463fb2025-04-17 11:22:22 -0700211 goTestArgs = append(goTestArgs, pkgList...)
212
213 afterTestCmd := exec.CommandContext(ctx, "go", goTestArgs...)
214 afterTestCmd.Dir = r.repoRoot
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000215 afterTestOut, _ := afterTestCmd.Output()
216 // unfortunately, we can't short-circuit here even if all tests pass,
217 // because we need to check for skipped tests.
Earl Lee2e463fb2025-04-17 11:22:22 -0700218
219 err := r.initializeInitialCommitWorktree(ctx)
220 if err != nil {
221 return "", err
222 }
223
224 beforeTestCmd := exec.CommandContext(ctx, "go", goTestArgs...)
225 beforeTestCmd.Dir = r.initialWorktree
226 beforeTestOut, _ := beforeTestCmd.Output() // ignore error, interesting info is in the output
227
228 // Parse the jsonl test results
229 beforeResults, beforeParseErr := parseTestResults(beforeTestOut)
230 if beforeParseErr != nil {
231 return "", fmt.Errorf("unable to parse test results for initial commit: %w\n%s", beforeParseErr, beforeTestOut)
232 }
233 afterResults, afterParseErr := parseTestResults(afterTestOut)
234 if afterParseErr != nil {
235 return "", fmt.Errorf("unable to parse test results for current commit: %w\n%s", afterParseErr, afterTestOut)
236 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700237 testRegressions, err := r.compareTestResults(beforeResults, afterResults)
238 if err != nil {
239 return "", fmt.Errorf("failed to compare test results: %w", err)
240 }
241 // TODO: better output formatting?
242 res := r.formatTestRegressions(testRegressions)
243 return res, nil
244}
245
Earl Lee2e463fb2025-04-17 11:22:22 -0700246// GoplsIssue represents a single issue reported by gopls check
247type GoplsIssue struct {
248 Position string // File position in format "file:line:col-range"
249 Message string // Description of the issue
250}
251
Josh Bleecher Snyderde5f7442025-05-15 18:32:32 +0000252// goplsIgnore contains substring patterns for gopls (and vet) diagnostic messages that should be suppressed.
253var goplsIgnore = []string{
254 // these are often just wrong, see https://github.com/golang/go/issues/57059#issuecomment-2884771470
255 "ends with redundant newline",
256
257 // as of May 2025, Claude doesn't understand strings/bytes.SplitSeq well enough to use it
258 "SplitSeq",
259}
260
Earl Lee2e463fb2025-04-17 11:22:22 -0700261// checkGopls runs gopls check on the provided files in both the current and initial state,
262// compares the results, and reports any new issues introduced in the current state.
263func (r *CodeReviewer) checkGopls(ctx context.Context, changedFiles []string) (string, error) {
264 if len(changedFiles) == 0 {
265 return "", nil // no files to check
266 }
267
268 // Filter out non-Go files as gopls only works on Go files
269 // and verify they still exist (not deleted)
270 var goFiles []string
271 for _, file := range changedFiles {
272 if !strings.HasSuffix(file, ".go") {
273 continue // not a Go file
274 }
275
276 // Check if the file still exists (not deleted)
277 if _, err := os.Stat(file); os.IsNotExist(err) {
278 continue // file doesn't exist anymore (deleted)
279 }
280
281 goFiles = append(goFiles, file)
282 }
283
284 if len(goFiles) == 0 {
285 return "", nil // no Go files to check
286 }
287
288 // Run gopls check on the current state
289 goplsArgs := append([]string{"check"}, goFiles...)
290
291 afterGoplsCmd := exec.CommandContext(ctx, "gopls", goplsArgs...)
292 afterGoplsCmd.Dir = r.repoRoot
293 afterGoplsOut, err := afterGoplsCmd.CombinedOutput() // gopls returns non-zero if it finds issues
294 if err != nil {
295 // Check if the output looks like real gopls issues or if it's just error output
296 if !looksLikeGoplsIssues(afterGoplsOut) {
297 slog.WarnContext(ctx, "CodeReviewer.checkGopls: gopls check failed to run properly", "err", err, "output", string(afterGoplsOut))
298 return "", nil // Skip rather than failing the entire code review
299 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700300 }
301
302 // Parse the output
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000303 afterIssues := parseGoplsOutput(r.repoRoot, afterGoplsOut)
Earl Lee2e463fb2025-04-17 11:22:22 -0700304
305 // If no issues were found, we're done
306 if len(afterIssues) == 0 {
307 return "", nil
308 }
309
310 // Gopls detected issues in the current state, check if they existed in the initial state
311 initErr := r.initializeInitialCommitWorktree(ctx)
312 if initErr != nil {
313 return "", err
314 }
315
316 // For each file that exists in the initial commit, run gopls check
317 var initialFilesToCheck []string
318 for _, file := range goFiles {
319 // Get relative path for git operations
320 relFile, err := filepath.Rel(r.repoRoot, file)
321 if err != nil {
322 slog.WarnContext(ctx, "CodeReviewer.checkGopls: failed to get relative path", "repo_root", r.repoRoot, "file", file, "err", err)
323 continue
324 }
325
326 // Check if the file exists in the initial commit
Philip Zeyliger49edc922025-05-14 09:45:45 -0700327 checkCmd := exec.CommandContext(ctx, "git", "cat-file", "-e", fmt.Sprintf("%s:%s", r.sketchBaseRef, relFile))
Earl Lee2e463fb2025-04-17 11:22:22 -0700328 checkCmd.Dir = r.repoRoot
329 if err := checkCmd.Run(); err == nil {
330 // File exists in initial commit
331 initialFilePath := filepath.Join(r.initialWorktree, relFile)
332 initialFilesToCheck = append(initialFilesToCheck, initialFilePath)
333 }
334 }
335
336 // Run gopls check on the files that existed in the initial commit
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000337 var beforeIssues []GoplsIssue
Earl Lee2e463fb2025-04-17 11:22:22 -0700338 if len(initialFilesToCheck) > 0 {
339 beforeGoplsArgs := append([]string{"check"}, initialFilesToCheck...)
340 beforeGoplsCmd := exec.CommandContext(ctx, "gopls", beforeGoplsArgs...)
341 beforeGoplsCmd.Dir = r.initialWorktree
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000342 beforeGoplsOut, beforeCmdErr := beforeGoplsCmd.CombinedOutput()
Earl Lee2e463fb2025-04-17 11:22:22 -0700343 if beforeCmdErr != nil && !looksLikeGoplsIssues(beforeGoplsOut) {
344 // If gopls fails to run properly on the initial commit, log a warning and continue
345 // with empty before issues - this will be conservative and report more issues
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000346 slog.WarnContext(ctx, "CodeReviewer.checkGopls: gopls check failed on initial commit", "err", err, "output", string(beforeGoplsOut))
Earl Lee2e463fb2025-04-17 11:22:22 -0700347 } else {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000348 beforeIssues = parseGoplsOutput(r.initialWorktree, beforeGoplsOut)
Earl Lee2e463fb2025-04-17 11:22:22 -0700349 }
350 }
351
352 // Find new issues that weren't present in the initial state
353 goplsRegressions := findGoplsRegressions(beforeIssues, afterIssues)
354 if len(goplsRegressions) == 0 {
355 return "", nil // no new issues
356 }
357
358 // Format the results
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000359 return r.formatGoplsRegressions(goplsRegressions), nil
Earl Lee2e463fb2025-04-17 11:22:22 -0700360}
361
Josh Bleecher Snyderde5f7442025-05-15 18:32:32 +0000362// parseGoplsOutput parses the text output from gopls check.
363// It drops any that match the patterns in goplsIgnore.
Earl Lee2e463fb2025-04-17 11:22:22 -0700364// Each line has the format: '/path/to/file.go:448:22-26: unused parameter: path'
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000365func parseGoplsOutput(root string, output []byte) []GoplsIssue {
Earl Lee2e463fb2025-04-17 11:22:22 -0700366 var issues []GoplsIssue
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +0000367 for line := range strings.Lines(string(output)) {
Earl Lee2e463fb2025-04-17 11:22:22 -0700368 line = strings.TrimSpace(line)
369 if line == "" {
370 continue
371 }
372
373 // Skip lines that look like error messages rather than gopls issues
374 if strings.HasPrefix(line, "Error:") ||
375 strings.HasPrefix(line, "Failed:") ||
376 strings.HasPrefix(line, "Warning:") ||
377 strings.HasPrefix(line, "gopls:") {
378 continue
379 }
380
381 // Find the first colon that separates the file path from the line number
382 firstColonIdx := strings.Index(line, ":")
383 if firstColonIdx < 0 {
384 continue // Invalid format
385 }
386
387 // Verify the part before the first colon looks like a file path
388 potentialPath := line[:firstColonIdx]
389 if !strings.HasSuffix(potentialPath, ".go") {
390 continue // Not a Go file path
391 }
392
393 // Find the position of the first message separator ': '
394 // This separates the position info from the message
395 messageStart := strings.Index(line, ": ")
396 if messageStart < 0 || messageStart <= firstColonIdx {
397 continue // Invalid format
398 }
399
400 // Extract position and message
401 position := line[:messageStart]
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000402 rel, err := filepath.Rel(root, position)
403 if err == nil {
404 position = rel
405 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700406 message := line[messageStart+2:] // Skip the ': ' separator
407
408 // Verify position has the expected format (at least 2 colons for line:col)
409 colonCount := strings.Count(position, ":")
410 if colonCount < 2 {
411 continue // Not enough position information
412 }
413
Josh Bleecher Snyderde5f7442025-05-15 18:32:32 +0000414 // Skip diagnostics that match any of our ignored patterns
415 if shouldIgnoreDiagnostic(message) {
416 continue
417 }
418
Earl Lee2e463fb2025-04-17 11:22:22 -0700419 issues = append(issues, GoplsIssue{
420 Position: position,
421 Message: message,
422 })
423 }
424
425 return issues
426}
427
428// looksLikeGoplsIssues checks if the output appears to be actual gopls issues
429// rather than error messages about gopls itself failing
430func looksLikeGoplsIssues(output []byte) bool {
431 // If output is empty, it's not valid issues
432 if len(output) == 0 {
433 return false
434 }
435
436 // Check if output has at least one line that looks like a gopls issue
437 // A gopls issue looks like: '/path/to/file.go:123:45-67: message'
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +0000438 for line := range strings.Lines(string(output)) {
Earl Lee2e463fb2025-04-17 11:22:22 -0700439 line = strings.TrimSpace(line)
440 if line == "" {
441 continue
442 }
443
444 // A gopls issue has at least two colons (file path, line number, column)
445 // and contains a colon followed by a space (separating position from message)
446 colonCount := strings.Count(line, ":")
447 hasSeparator := strings.Contains(line, ": ")
448
449 if colonCount >= 2 && hasSeparator {
450 // Check if it starts with a likely file path (ending in .go)
451 parts := strings.SplitN(line, ":", 2)
452 if strings.HasSuffix(parts[0], ".go") {
453 return true
454 }
455 }
456 }
457 return false
458}
459
460// normalizeGoplsPosition extracts just the file path from a position string
461func normalizeGoplsPosition(position string) string {
462 // Extract just the file path by taking everything before the first colon
463 parts := strings.Split(position, ":")
464 if len(parts) < 1 {
465 return position
466 }
467 return parts[0]
468}
469
470// findGoplsRegressions identifies gopls issues that are new in the after state
471func findGoplsRegressions(before, after []GoplsIssue) []GoplsIssue {
472 var regressions []GoplsIssue
473
474 // Build map of before issues for easier lookup
475 beforeIssueMap := make(map[string]map[string]bool) // file -> message -> exists
476 for _, issue := range before {
477 file := normalizeGoplsPosition(issue.Position)
478 if _, exists := beforeIssueMap[file]; !exists {
479 beforeIssueMap[file] = make(map[string]bool)
480 }
481 // Store both the exact message and the general issue type for fuzzy matching
482 beforeIssueMap[file][issue.Message] = true
483
484 // Extract the general issue type (everything before the first ':' in the message)
485 generalIssue := issue.Message
486 if colonIdx := strings.Index(issue.Message, ":"); colonIdx > 0 {
487 generalIssue = issue.Message[:colonIdx]
488 }
489 beforeIssueMap[file][generalIssue] = true
490 }
491
492 // Check each after issue to see if it's new
493 for _, afterIssue := range after {
494 file := normalizeGoplsPosition(afterIssue.Position)
495 isNew := true
496
497 if fileIssues, fileExists := beforeIssueMap[file]; fileExists {
498 // Check for exact message match
499 if fileIssues[afterIssue.Message] {
500 isNew = false
501 } else {
502 // Check for general issue type match
503 generalIssue := afterIssue.Message
504 if colonIdx := strings.Index(afterIssue.Message, ":"); colonIdx > 0 {
505 generalIssue = afterIssue.Message[:colonIdx]
506 }
507 if fileIssues[generalIssue] {
508 isNew = false
509 }
510 }
511 }
512
513 if isNew {
514 regressions = append(regressions, afterIssue)
515 }
516 }
517
518 // Sort regressions for deterministic output
519 slices.SortFunc(regressions, func(a, b GoplsIssue) int {
520 return strings.Compare(a.Position, b.Position)
521 })
522
523 return regressions
524}
525
526// formatGoplsRegressions generates a human-readable summary of gopls check regressions
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000527func (r *CodeReviewer) formatGoplsRegressions(regressions []GoplsIssue) string {
Earl Lee2e463fb2025-04-17 11:22:22 -0700528 if len(regressions) == 0 {
529 return ""
530 }
531
532 var sb strings.Builder
533 sb.WriteString("Gopls check issues detected:\n\n")
534
535 // Format each issue
536 for i, issue := range regressions {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000537 sb.WriteString(fmt.Sprintf("%d. %s: %s\n", i+1, filepath.Join(r.repoRoot, issue.Position), issue.Message))
Earl Lee2e463fb2025-04-17 11:22:22 -0700538 }
539
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000540 sb.WriteString("\nIMPORTANT: Only fix new gopls check issues in parts of the code that you have already edited.")
541 sb.WriteString(" Do not change existing code that was not part of your current edits.\n")
Earl Lee2e463fb2025-04-17 11:22:22 -0700542 return sb.String()
543}
544
545func (r *CodeReviewer) HasReviewed(commit string) bool {
546 return slices.Contains(r.reviewed, commit)
547}
548
549func (r *CodeReviewer) IsInitialCommit(commit string) bool {
Philip Zeyliger49edc922025-05-14 09:45:45 -0700550 return commit == r.sketchBaseRef
Earl Lee2e463fb2025-04-17 11:22:22 -0700551}
552
Philip Zeyliger49edc922025-05-14 09:45:45 -0700553// requireHEADDescendantOfSketchBaseRef returns an error if HEAD is not a descendant of r.initialCommit.
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000554// This serves two purposes:
555// - ensures we're not still on the initial commit
556// - ensures we're not on a separate branch or upstream somewhere, which would be confusing
Philip Zeyliger49edc922025-05-14 09:45:45 -0700557func (r *CodeReviewer) requireHEADDescendantOfSketchBaseRef(ctx context.Context) error {
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000558 head, err := r.CurrentCommit(ctx)
559 if err != nil {
560 return err
561 }
562
563 // Note: Git's merge-base --is-ancestor checks strict ancestry (i.e., <), so a commit is NOT an ancestor of itself.
Philip Zeyliger49edc922025-05-14 09:45:45 -0700564 cmd := exec.CommandContext(ctx, "git", "merge-base", "--is-ancestor", r.sketchBaseRef, head)
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000565 cmd.Dir = r.repoRoot
566 err = cmd.Run()
567 if err != nil {
568 if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
569 // Exit code 1 means not an ancestor
570 return fmt.Errorf("HEAD is not a descendant of the initial commit")
571 }
572 return fmt.Errorf("failed to check whether initial commit is ancestor: %w", err)
573 }
574 return nil
575}
576
Earl Lee2e463fb2025-04-17 11:22:22 -0700577// packagesForFiles returns maps of packages related to the given files:
578// 1. directPkgs: packages that directly contain the changed files
579// 2. allPkgs: all packages that might be affected, including downstream packages that depend on the direct packages
580// It may include false positives.
581// Files must be absolute paths!
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000582func (r *CodeReviewer) packagesForFiles(ctx context.Context, files []string) (allPkgs map[string]*packages.Package, err error) {
Earl Lee2e463fb2025-04-17 11:22:22 -0700583 for _, f := range files {
584 if !filepath.IsAbs(f) {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000585 return nil, fmt.Errorf("path %q is not absolute", f)
Earl Lee2e463fb2025-04-17 11:22:22 -0700586 }
587 }
588 cfg := &packages.Config{
589 Mode: packages.LoadImports | packages.NeedEmbedFiles,
590 Context: ctx,
591 // Logf: func(msg string, args ...any) {
592 // slog.DebugContext(ctx, "loading go packages", "msg", fmt.Sprintf(msg, args...))
593 // },
594 // TODO: in theory, go.mod might not be in the repo root, and there might be multiple go.mod files.
595 // We can cross that bridge when we get there.
596 Dir: r.repoRoot,
597 Tests: true,
598 }
599 universe, err := packages.Load(cfg, "./...")
600 if err != nil {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000601 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -0700602 }
603 // Identify packages that directly contain the changed files
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000604 directPkgs := make(map[string]*packages.Package) // import path -> package
Earl Lee2e463fb2025-04-17 11:22:22 -0700605 for _, pkg := range universe {
Earl Lee2e463fb2025-04-17 11:22:22 -0700606 pkgFiles := allFiles(pkg)
Earl Lee2e463fb2025-04-17 11:22:22 -0700607 for _, file := range files {
608 if pkgFiles[file] {
609 // prefer test packages, as they contain strictly more files (right?)
610 prev := directPkgs[pkg.PkgPath]
611 if prev == nil || prev.ForTest == "" {
612 directPkgs[pkg.PkgPath] = pkg
613 }
614 }
615 }
616 }
617
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000618 allPkgs = maps.Clone(directPkgs)
Earl Lee2e463fb2025-04-17 11:22:22 -0700619
620 // Add packages that depend on the direct packages
621 addDependentPackages(universe, allPkgs)
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000622 return allPkgs, nil
Earl Lee2e463fb2025-04-17 11:22:22 -0700623}
624
625// allFiles returns all files that might be referenced by the package.
626// It may contain false positives.
627func allFiles(p *packages.Package) map[string]bool {
628 files := make(map[string]bool)
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000629 // Add files from package info
Earl Lee2e463fb2025-04-17 11:22:22 -0700630 add := [][]string{p.GoFiles, p.CompiledGoFiles, p.OtherFiles, p.EmbedFiles, p.IgnoredFiles}
631 for _, extra := range add {
632 for _, file := range extra {
633 files[file] = true
634 }
635 }
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000636 // Add files from testdata directory
637 testdataDir := filepath.Join(p.Dir, "testdata")
638 if _, err := os.Stat(testdataDir); err == nil {
639 fsys := os.DirFS(p.Dir)
640 fs.WalkDir(fsys, "testdata", func(path string, d fs.DirEntry, err error) error {
641 if err == nil && !d.IsDir() {
642 files[filepath.Join(p.Dir, path)] = true
643 }
644 return nil
645 })
646 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700647 return files
648}
649
650// addDependentPackages adds to pkgs all packages from universe
651// that directly or indirectly depend on any package already in pkgs.
652func addDependentPackages(universe []*packages.Package, pkgs map[string]*packages.Package) {
653 for {
654 changed := false
655 for _, p := range universe {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000656 if strings.HasSuffix(p.PkgPath, ".test") { // ick, but I don't see another way
657 // skip test packages
658 continue
659 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700660 if _, ok := pkgs[p.PkgPath]; ok {
661 // already in pkgs
662 continue
663 }
664 for importPath := range p.Imports {
665 if _, ok := pkgs[importPath]; ok {
666 // imports a package dependent on pkgs, add it
667 pkgs[p.PkgPath] = p
668 changed = true
669 break
670 }
671 }
672 }
673 if !changed {
674 break
675 }
676 }
677}
678
679// testJSON is a union of BuildEvent and TestEvent
680type testJSON struct {
681 // TestEvent only:
682 // The Time field holds the time the event happened. It is conventionally omitted
683 // for cached test results.
684 Time time.Time `json:"Time"`
685 // BuildEvent only:
686 // The ImportPath field gives the package ID of the package being built.
687 // This matches the Package.ImportPath field of go list -json and the
688 // TestEvent.FailedBuild field of go test -json. Note that it does not
689 // match TestEvent.Package.
690 ImportPath string `json:"ImportPath"` // BuildEvent only
691 // TestEvent only:
692 // The Package field, if present, specifies the package being tested. When the
693 // go command runs parallel tests in -json mode, events from different tests are
694 // interlaced; the Package field allows readers to separate them.
695 Package string `json:"Package"`
696 // Action is used in both BuildEvent and TestEvent.
697 // It is the key to distinguishing between them.
698 // BuildEvent:
699 // build-output or build-fail
700 // TestEvent:
701 // start, run, pause, cont, pass, bench, fail, output, skip
702 Action string `json:"Action"`
703 // TestEvent only:
704 // The Test field, if present, specifies the test, example, or benchmark function
705 // that caused the event. Events for the overall package test do not set Test.
706 Test string `json:"Test"`
707 // TestEvent only:
708 // The Elapsed field is set for "pass" and "fail" events. It gives the time elapsed in seconds
709 // for the specific test or the overall package test that passed or failed.
710 Elapsed float64
711 // TestEvent:
712 // The Output field is set for Action == "output" and is a portion of the
713 // test's output (standard output and standard error merged together). The
714 // output is unmodified except that invalid UTF-8 output from a test is coerced
715 // into valid UTF-8 by use of replacement characters. With that one exception,
716 // the concatenation of the Output fields of all output events is the exact output
717 // of the test execution.
718 // BuildEvent:
719 // The Output field is set for Action == "build-output" and is a portion of
720 // the build's output. The concatenation of the Output fields of all output
721 // events is the exact output of the build. A single event may contain one
722 // or more lines of output and there may be more than one output event for
723 // a given ImportPath. This matches the definition of the TestEvent.Output
724 // field produced by go test -json.
725 Output string `json:"Output"`
726 // TestEvent only:
727 // The FailedBuild field is set for Action == "fail" if the test failure was caused
728 // by a build failure. It contains the package ID of the package that failed to
729 // build. This matches the ImportPath field of the "go list" output, as well as the
730 // BuildEvent.ImportPath field as emitted by "go build -json".
731 FailedBuild string `json:"FailedBuild"`
732}
733
734// parseTestResults converts test output in JSONL format into a slice of testJSON objects
735func parseTestResults(testOutput []byte) ([]testJSON, error) {
736 var results []testJSON
737 dec := json.NewDecoder(bytes.NewReader(testOutput))
738 for {
739 var event testJSON
740 if err := dec.Decode(&event); err != nil {
741 if err == io.EOF {
742 break
743 }
744 return nil, err
745 }
746 results = append(results, event)
747 }
748 return results, nil
749}
750
751// testStatus represents the status of a test in a given commit
752type testStatus int
753
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700754//go:generate go tool golang.org/x/tools/cmd/stringer -type=testStatus -trimprefix=testStatus
Earl Lee2e463fb2025-04-17 11:22:22 -0700755const (
756 testStatusUnknown testStatus = iota
757 testStatusPass
758 testStatusFail
759 testStatusBuildFail
760 testStatusSkip
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000761 testStatusNoTests // no tests exist for this package
Earl Lee2e463fb2025-04-17 11:22:22 -0700762)
763
Earl Lee2e463fb2025-04-17 11:22:22 -0700764// testRegression represents a test that regressed between commits
765type testRegression struct {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000766 Package string
767 Test string // empty for package tests
Earl Lee2e463fb2025-04-17 11:22:22 -0700768 BeforeStatus testStatus
769 AfterStatus testStatus
770 Output string // failure output in the after state
771}
772
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000773func (r *testRegression) Source() string {
774 if r.Test == "" {
775 return r.Package
Earl Lee2e463fb2025-04-17 11:22:22 -0700776 }
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000777 return fmt.Sprintf("%s.%s", r.Package, r.Test)
778}
Earl Lee2e463fb2025-04-17 11:22:22 -0700779
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000780type packageResult struct {
781 Status testStatus // overall status for the package
782 TestStatus map[string]testStatus // name -> status
783 TestOutput map[string][]string // name -> output parts
784}
Earl Lee2e463fb2025-04-17 11:22:22 -0700785
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000786// collectTestStatuses processes a slice of test events and returns rich status information
787func collectTestStatuses(results []testJSON) map[string]*packageResult {
788 m := make(map[string]*packageResult)
789
790 for _, event := range results {
791 pkg := event.Package
792 p, ok := m[pkg]
793 if !ok {
794 p = new(packageResult)
795 p.TestStatus = make(map[string]testStatus)
796 p.TestOutput = make(map[string][]string)
797 m[pkg] = p
Earl Lee2e463fb2025-04-17 11:22:22 -0700798 }
799
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000800 switch event.Action {
801 case "output":
802 p.TestOutput[event.Test] = append(p.TestOutput[event.Test], event.Output)
Earl Lee2e463fb2025-04-17 11:22:22 -0700803 continue
Earl Lee2e463fb2025-04-17 11:22:22 -0700804 case "pass":
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000805 if event.Test == "" {
806 p.Status = testStatusPass
807 } else {
808 p.TestStatus[event.Test] = testStatusPass
809 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700810 case "fail":
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000811 if event.Test == "" {
812 if event.FailedBuild != "" {
813 p.Status = testStatusBuildFail
814 } else {
815 p.Status = testStatusFail
816 }
817 } else {
818 p.TestStatus[event.Test] = testStatusFail
819 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700820 case "skip":
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000821 if event.Test == "" {
822 p.Status = testStatusNoTests
823 } else {
824 p.TestStatus[event.Test] = testStatusSkip
825 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700826 }
827 }
828
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000829 return m
Earl Lee2e463fb2025-04-17 11:22:22 -0700830}
831
832// compareTestResults identifies tests that have regressed between commits
833func (r *CodeReviewer) compareTestResults(beforeResults, afterResults []testJSON) ([]testRegression, error) {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000834 before := collectTestStatuses(beforeResults)
835 after := collectTestStatuses(afterResults)
836 var testLevelRegressions []testRegression
837 var packageLevelRegressions []testRegression
Earl Lee2e463fb2025-04-17 11:22:22 -0700838
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000839 afterPkgs := slices.Sorted(maps.Keys(after))
840 for _, pkg := range afterPkgs {
841 afterResult := after[pkg]
842 afterStatus := afterResult.Status
843 // Can't short-circuit here when tests are passing, because we need to check for skipped tests.
844 beforeResult, ok := before[pkg]
845 beforeStatus := testStatusNoTests
846 if ok {
847 beforeStatus = beforeResult.Status
848 }
849 // If things no longer build, stop at the package level.
850 // Otherwise, proceed to the test level, so we have more precise information.
851 if afterStatus == testStatusBuildFail && beforeStatus != testStatusBuildFail {
852 packageLevelRegressions = append(packageLevelRegressions, testRegression{
853 Package: pkg,
854 BeforeStatus: beforeStatus,
855 AfterStatus: afterStatus,
856 })
857 continue
858 }
859 tests := slices.Sorted(maps.Keys(afterResult.TestStatus))
860 for _, test := range tests {
861 afterStatus := afterResult.TestStatus[test]
862 switch afterStatus {
863 case testStatusPass:
864 continue
865 case testStatusUnknown:
866 slog.WarnContext(context.Background(), "unknown test status", "package", pkg, "test", test)
867 continue
868 }
banksean07b74002025-06-26 16:05:25 +0000869 beforeStatus := testStatusUnknown
870 if beforeResult != nil {
871 beforeStatus = beforeResult.TestStatus[test]
872 }
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000873 if isRegression(beforeStatus, afterStatus) {
874 testLevelRegressions = append(testLevelRegressions, testRegression{
875 Package: pkg,
876 Test: test,
877 BeforeStatus: beforeStatus,
878 AfterStatus: afterStatus,
879 Output: strings.Join(afterResult.TestOutput[test], ""),
880 })
881 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700882 }
883 }
884
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000885 // If we have test-level regressions, report only those
886 // Otherwise, report package-level regressions
Earl Lee2e463fb2025-04-17 11:22:22 -0700887 var regressions []testRegression
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000888 if len(testLevelRegressions) > 0 {
889 regressions = testLevelRegressions
890 } else {
891 regressions = packageLevelRegressions
Earl Lee2e463fb2025-04-17 11:22:22 -0700892 }
893
894 // Sort regressions for consistent output
895 slices.SortFunc(regressions, func(a, b testRegression) int {
896 // First by package
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000897 if c := strings.Compare(a.Package, b.Package); c != 0 {
Earl Lee2e463fb2025-04-17 11:22:22 -0700898 return c
899 }
900 // Then by test name
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000901 return strings.Compare(a.Test, b.Test)
Earl Lee2e463fb2025-04-17 11:22:22 -0700902 })
903
904 return regressions, nil
905}
906
907// badnessLevels maps test status to a badness level
908// Higher values indicate worse status (more severe issues)
909var badnessLevels = map[testStatus]int{
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000910 testStatusBuildFail: 5, // Worst
911 testStatusFail: 4,
912 testStatusSkip: 3,
913 testStatusNoTests: 2,
Earl Lee2e463fb2025-04-17 11:22:22 -0700914 testStatusPass: 1,
915 testStatusUnknown: 0, // Least bad - avoids false positives
916}
917
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000918// regressionFormatter defines a mapping of before/after state pairs to descriptive messages
919type regressionKey struct {
920 before, after testStatus
921}
922
923var regressionMessages = map[regressionKey]string{
924 {testStatusUnknown, testStatusBuildFail}: "New test has build/vet errors",
925 {testStatusUnknown, testStatusFail}: "New test is failing",
926 {testStatusUnknown, testStatusSkip}: "New test is skipped",
927 {testStatusPass, testStatusBuildFail}: "Was passing, now has build/vet errors",
928 {testStatusPass, testStatusFail}: "Was passing, now failing",
929 {testStatusPass, testStatusSkip}: "Was passing, now skipped",
930 {testStatusNoTests, testStatusBuildFail}: "Previously had no tests, now has build/vet errors",
931 {testStatusNoTests, testStatusFail}: "Previously had no tests, now has failing tests",
932 {testStatusNoTests, testStatusSkip}: "Previously had no tests, now has skipped tests",
933 {testStatusSkip, testStatusBuildFail}: "Was skipped, now has build/vet errors",
934 {testStatusSkip, testStatusFail}: "Was skipped, now failing",
935 {testStatusFail, testStatusBuildFail}: "Was failing, now has build/vet errors",
936}
937
Earl Lee2e463fb2025-04-17 11:22:22 -0700938// isRegression determines if a test has regressed based on before and after status
939// A regression is defined as an increase in badness level
940func isRegression(before, after testStatus) bool {
941 // Higher badness level means worse status
942 return badnessLevels[after] > badnessLevels[before]
943}
944
945// formatTestRegressions generates a human-readable summary of test regressions
946func (r *CodeReviewer) formatTestRegressions(regressions []testRegression) string {
947 if len(regressions) == 0 {
948 return ""
949 }
950
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000951 buf := new(strings.Builder)
Philip Zeyliger49edc922025-05-14 09:45:45 -0700952 fmt.Fprintf(buf, "Test regressions detected between initial commit (%s) and HEAD:\n\n", r.sketchBaseRef)
Earl Lee2e463fb2025-04-17 11:22:22 -0700953
954 for i, reg := range regressions {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000955 fmt.Fprintf(buf, "%d: %v: ", i+1, reg.Source())
956 key := regressionKey{reg.BeforeStatus, reg.AfterStatus}
957 message, exists := regressionMessages[key]
958 if !exists {
959 message = "Regression detected"
Earl Lee2e463fb2025-04-17 11:22:22 -0700960 }
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000961 fmt.Fprintf(buf, "%s\n", message)
Earl Lee2e463fb2025-04-17 11:22:22 -0700962 }
963
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000964 return buf.String()
Earl Lee2e463fb2025-04-17 11:22:22 -0700965}
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000966
967// RelatedFile represents a file historically related to the changed files
968type RelatedFile struct {
969 Path string // Path to the file
970 Correlation float64 // Correlation score (0.0-1.0)
971}
972
Josh Bleecher Snyder26b6f9b2025-07-01 01:41:11 +0000973// hashChangedFiles creates a deterministic hash of the changed files set
974func (r *CodeReviewer) hashChangedFiles(changedFiles []string) string {
975 // Sort files for deterministic hashing
976 sorted := slices.Clone(changedFiles)
977 slices.Sort(sorted)
978 h := sha256.New()
979 enc := json.NewEncoder(h)
980 err := enc.Encode(sorted)
981 if err != nil {
982 panic(err)
983 }
984 return hex.EncodeToString(h.Sum(nil))
985}
986
987// findRelatedFiles reports files that are historically related to the changed files
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000988// by analyzing git commit history for co-occurrences.
Josh Bleecher Snyder26b6f9b2025-07-01 01:41:11 +0000989// This function implements caching to avoid duplicate CPU and LLM processing:
990// 1. If the exact same set of changedFiles has been processed before, return nil, nil
991// 2. If all related files have been previously reported, return nil, nil
992// 3. Otherwise, return the full set of related files and mark them as reported
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000993func (r *CodeReviewer) findRelatedFiles(ctx context.Context, changedFiles []string) ([]RelatedFile, error) {
Josh Bleecher Snyder26b6f9b2025-07-01 01:41:11 +0000994 cf := r.hashChangedFiles(changedFiles)
995 if r.processedChangedFileSets[cf] {
996 return nil, nil
997 }
998 r.processedChangedFileSets[cf] = true // don't re-process, even on error
999
1000 relatedFiles, err := r.computeRelatedFiles(ctx, changedFiles)
1001 if err != nil {
1002 return nil, err
1003 }
1004
1005 hasNew := false
1006 for _, rf := range relatedFiles {
1007 if !r.reportedRelatedFiles[rf.Path] {
1008 hasNew = true
1009 break
1010 }
1011 }
1012 if !hasNew {
1013 return nil, nil
1014 }
1015
1016 // We have new file(s) that haven't been called to the LLM's attention yet.
1017 for _, rf := range relatedFiles {
1018 r.reportedRelatedFiles[rf.Path] = true
1019 }
1020
1021 return relatedFiles, nil
1022}
1023
1024// computeRelatedFiles implements findRelatedFiles, without caching.
1025func (r *CodeReviewer) computeRelatedFiles(ctx context.Context, changedFiles []string) ([]RelatedFile, error) {
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +00001026 commits, err := r.getCommitsTouchingFiles(ctx, changedFiles)
1027 if err != nil {
1028 return nil, fmt.Errorf("failed to get commits touching files: %w", err)
1029 }
1030 if len(commits) == 0 {
1031 return nil, nil
1032 }
1033
1034 relChanged := make(map[string]bool, len(changedFiles))
1035 for _, file := range changedFiles {
1036 rel, err := filepath.Rel(r.repoRoot, file)
1037 if err != nil {
1038 return nil, fmt.Errorf("failed to get relative path for %s: %w", file, err)
1039 }
1040 relChanged[rel] = true
1041 }
1042
1043 historyFiles := make(map[string]int)
1044 var historyMu sync.Mutex
1045
1046 maxWorkers := runtime.GOMAXPROCS(0)
1047 semaphore := make(chan bool, maxWorkers)
1048 var wg sync.WaitGroup
1049
1050 for _, commit := range commits {
1051 wg.Add(1)
1052 semaphore <- true // acquire
1053
1054 go func(commit string) {
1055 defer wg.Done()
1056 defer func() { <-semaphore }() // release
1057 commitFiles, err := r.getFilesInCommit(ctx, commit)
1058 if err != nil {
1059 slog.WarnContext(ctx, "Failed to get files in commit", "commit", commit, "err", err)
1060 return
1061 }
1062 incr := 0
1063 for _, file := range commitFiles {
1064 if relChanged[file] {
1065 incr++
1066 }
1067 }
1068 if incr == 0 {
1069 return
1070 }
1071 historyMu.Lock()
1072 defer historyMu.Unlock()
1073 for _, file := range commitFiles {
1074 historyFiles[file] += incr
1075 }
1076 }(commit)
1077 }
1078 wg.Wait()
1079
1080 // normalize
1081 maxCount := 0
1082 for _, count := range historyFiles {
1083 maxCount = max(maxCount, count)
1084 }
1085 if maxCount == 0 {
1086 return nil, nil
1087 }
1088
1089 var relatedFiles []RelatedFile
1090 for file, count := range historyFiles {
1091 if relChanged[file] {
1092 // Don't include inputs in the output.
1093 continue
1094 }
1095 correlation := float64(count) / float64(maxCount)
1096 // Require min correlation to avoid noise
1097 if correlation >= 0.1 {
Pokey Rule75f93a12025-05-15 13:51:55 +00001098 // Check if the file still exists in the repository
1099 fullPath := filepath.Join(r.repoRoot, file)
1100 if _, err := os.Stat(fullPath); err == nil {
1101 relatedFiles = append(relatedFiles, RelatedFile{Path: file, Correlation: correlation})
1102 }
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +00001103 }
1104 }
1105
Josh Bleecher Snydera727f702025-06-04 18:47:33 -07001106 // Highest correlation first, then sort by path.
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +00001107 slices.SortFunc(relatedFiles, func(a, b RelatedFile) int {
Josh Bleecher Snydera727f702025-06-04 18:47:33 -07001108 return cmp.Or(
1109 -1*cmp.Compare(a.Correlation, b.Correlation),
1110 cmp.Compare(a.Path, b.Path),
1111 )
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +00001112 })
1113
1114 // Limit to 1 correlated file per input file.
1115 // (Arbitrary limit, to be adjusted.)
1116 maxFiles := len(changedFiles)
1117 if len(relatedFiles) > maxFiles {
1118 relatedFiles = relatedFiles[:maxFiles]
1119 }
1120
1121 // TODO: add an LLM in the mix here (like the keyword search tool) to do a filtering pass,
1122 // and then increase the strength of the wording in the relatedFiles message.
1123
1124 return relatedFiles, nil
1125}
1126
1127// getCommitsTouchingFiles returns all commits that touch any of the specified files
1128func (r *CodeReviewer) getCommitsTouchingFiles(ctx context.Context, files []string) ([]string, error) {
1129 if len(files) == 0 {
1130 return nil, nil
1131 }
Josh Bleecher Snyderd4b1cc72025-04-29 13:49:18 +00001132 fileArgs := append([]string{"rev-list", "--all", "--date-order", "--max-count=100", "--"}, files...)
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +00001133 cmd := exec.CommandContext(ctx, "git", fileArgs...)
1134 cmd.Dir = r.repoRoot
1135 out, err := cmd.CombinedOutput()
1136 if err != nil {
1137 return nil, fmt.Errorf("failed to get commits: %w\n%s", err, out)
1138 }
1139 return nonEmptyTrimmedLines(out), nil
1140}
1141
1142// getFilesInCommit returns all files changed in a specific commit
1143func (r *CodeReviewer) getFilesInCommit(ctx context.Context, commit string) ([]string, error) {
1144 cmd := exec.CommandContext(ctx, "git", "diff-tree", "--no-commit-id", "--name-only", "-r", commit)
1145 cmd.Dir = r.repoRoot
1146 out, err := cmd.CombinedOutput()
1147 if err != nil {
1148 return nil, fmt.Errorf("failed to get files in commit: %w\n%s", err, out)
1149 }
1150 return nonEmptyTrimmedLines(out), nil
1151}
1152
1153func nonEmptyTrimmedLines(b []byte) []string {
1154 var lines []string
1155 for line := range strings.Lines(string(b)) {
1156 line = strings.TrimSpace(line)
1157 if line != "" {
1158 lines = append(lines, line)
1159 }
1160 }
1161 return lines
1162}
1163
1164// formatRelatedFiles formats the related files list into a human-readable message
1165func (r *CodeReviewer) formatRelatedFiles(files []RelatedFile) string {
1166 if len(files) == 0 {
1167 return ""
1168 }
1169
1170 buf := new(strings.Builder)
1171
1172 fmt.Fprintf(buf, "Potentially related files:\n\n")
1173
1174 for _, file := range files {
1175 fmt.Fprintf(buf, "- %s (%0.0f%%)\n", file.Path, 100*file.Correlation)
1176 }
1177
1178 fmt.Fprintf(buf, "\nThese files have historically changed with the files you have modified. Consider whether they require updates as well.\n")
1179 return buf.String()
1180}
Josh Bleecher Snyderde5f7442025-05-15 18:32:32 +00001181
1182// shouldIgnoreDiagnostic reports whether a diagnostic message matches any of the patterns in goplsIgnore.
1183func shouldIgnoreDiagnostic(message string) bool {
1184 for _, pattern := range goplsIgnore {
1185 if strings.Contains(message, pattern) {
1186 return true
1187 }
1188 }
1189 return false
1190}
Josh Bleecher Snyder6534c7a2025-07-01 01:48:52 +00001191
1192// WarmTestCache runs 'go test -c' on relevant packages in the background
1193// to warm up the Go build cache. This is intended to be called after patch
1194// operations to prepare for future differential testing.
1195// It uses the base commit (before state) to warm cache for packages that
1196// will likely be tested during code review.
1197func (r *CodeReviewer) WarmTestCache(modifiedFile string) {
1198 if !r.isGoRepository() {
1199 return
1200 }
1201 if !strings.HasSuffix(modifiedFile, ".go") {
1202 return
1203 }
1204
1205 // Worktree must be created serially
1206 ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
1207 if err := r.initializeInitialCommitWorktree(ctx); err != nil {
1208 cancel()
1209 return
1210 }
1211
1212 go func() {
1213 defer cancel()
1214
1215 if err := r.warmTestCache(ctx, modifiedFile); err != nil {
1216 slog.DebugContext(ctx, "cache warming failed", "err", err)
1217 }
1218 }()
1219}
1220
1221func (r *CodeReviewer) warmTestCache(ctx context.Context, modifiedFile string) error {
1222 allPkgs, err := r.packagesForFiles(ctx, []string{r.absPath(modifiedFile)})
1223 if err != nil {
1224 return fmt.Errorf("failed to get packages for files: %w", err)
1225 }
1226 if len(allPkgs) == 0 {
1227 return nil
1228 }
1229
1230 var pkgPaths []string
1231 r.warmMutex.Lock()
1232 for pkgPath := range allPkgs {
1233 if strings.HasSuffix(pkgPath, ".test") {
1234 continue
1235 }
1236 if r.warmedPackages[pkgPath] {
1237 continue
1238 }
1239 // One attempt is enough.
1240 r.warmedPackages[pkgPath] = true
1241 pkgPaths = append(pkgPaths, pkgPath)
1242 }
1243 r.warmMutex.Unlock()
1244
1245 if len(pkgPaths) == 0 {
1246 return nil
1247 }
1248
1249 // Avoid stressing the machine: max 2 concurrent processes.
1250 args := []string{"test", "-c", "-p", "2", "-o", "/dev/null"}
1251 args = append(args, pkgPaths...)
1252
1253 cmd := exec.CommandContext(ctx, "go", args...)
1254 cmd.Dir = r.initialWorktree
1255 cmd.Stdout = io.Discard
1256 cmd.Stderr = io.Discard
1257
1258 slog.DebugContext(ctx, "warming test cache", "packages", len(pkgPaths), "worktree", r.initialWorktree)
1259
1260 start := time.Now()
1261 // Run the command and ignore errors - this is best effort
1262 err = cmd.Run()
1263 slog.DebugContext(ctx, "cache warming complete", "duration", time.Since(start), "error", err)
1264 return nil
1265}