blob: f12b4f04326890e15aa14a2e6c6adf06ab4b35d3 [file] [log] [blame]
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +00001package codereview
Earl Lee2e463fb2025-04-17 11:22:22 -07002
3import (
4 "bytes"
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +00005 "cmp"
Earl Lee2e463fb2025-04-17 11:22:22 -07006 "context"
7 "encoding/json"
8 "fmt"
9 "io"
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +000010 "io/fs"
Earl Lee2e463fb2025-04-17 11:22:22 -070011 "log/slog"
12 "maps"
13 "os"
14 "os/exec"
15 "path/filepath"
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +000016 "runtime"
Earl Lee2e463fb2025-04-17 11:22:22 -070017 "slices"
18 "strings"
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +000019 "sync"
Earl Lee2e463fb2025-04-17 11:22:22 -070020 "time"
21
22 "golang.org/x/tools/go/packages"
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070023 "sketch.dev/llm"
Earl Lee2e463fb2025-04-17 11:22:22 -070024)
25
26// This file does differential quality analysis of a commit relative to a base commit.
27
28// Tool returns a tool spec for a CodeReview tool backed by r.
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070029func (r *CodeReviewer) Tool() *llm.Tool {
30 spec := &llm.Tool{
Earl Lee2e463fb2025-04-17 11:22:22 -070031 Name: "codereview",
32 Description: `Run an automated code review.`,
33 // If you modify this, update the termui template for prettier rendering.
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070034 InputSchema: llm.MustSchema(`{"type": "object", "properties": {}}`),
Earl Lee2e463fb2025-04-17 11:22:22 -070035 Run: r.Run,
36 }
37 return spec
38}
39
Philip Zeyliger72252cb2025-05-10 17:00:08 -070040func (r *CodeReviewer) Run(ctx context.Context, m json.RawMessage) ([]llm.Content, error) {
Josh Bleecher Snyder2dc86b92025-04-29 14:11:58 +000041 // NOTE: If you add or modify error messages here, update the corresponding UI parsing in:
42 // webui/src/web-components/sketch-tool-card.ts (SketchToolCardCodeReview.getStatusIcon)
Earl Lee2e463fb2025-04-17 11:22:22 -070043 if err := r.RequireNormalGitState(ctx); err != nil {
44 slog.DebugContext(ctx, "CodeReviewer.Run: failed to check for normal git state", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -070045 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -070046 }
47 if err := r.RequireNoUncommittedChanges(ctx); err != nil {
48 slog.DebugContext(ctx, "CodeReviewer.Run: failed to check for uncommitted changes", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -070049 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -070050 }
51
52 // Check that the current commit is not the initial commit
53 currentCommit, err := r.CurrentCommit(ctx)
54 if err != nil {
55 slog.DebugContext(ctx, "CodeReviewer.Run: failed to get current commit", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -070056 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -070057 }
58 if r.IsInitialCommit(currentCommit) {
59 slog.DebugContext(ctx, "CodeReviewer.Run: current commit is initial commit, nothing to review")
Philip Zeyliger72252cb2025-05-10 17:00:08 -070060 return nil, fmt.Errorf("no new commits have been added, nothing to review")
Earl Lee2e463fb2025-04-17 11:22:22 -070061 }
62
63 // No matter what failures happen from here out, we will declare this to have been reviewed.
64 // This should help avoid the model getting blocked by a broken code review tool.
65 r.reviewed = append(r.reviewed, currentCommit)
66
Philip Zeyliger49edc922025-05-14 09:45:45 -070067 changedFiles, err := r.changedFiles(ctx, r.sketchBaseRef, currentCommit)
Earl Lee2e463fb2025-04-17 11:22:22 -070068 if err != nil {
69 slog.DebugContext(ctx, "CodeReviewer.Run: failed to get changed files", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -070070 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -070071 }
72
73 // Prepare to analyze before/after for the impacted files.
74 // We use the current commit to determine what packages exist and are impacted.
75 // The packages in the initial commit may be different.
76 // Good enough for now.
77 // TODO: do better
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +000078 allPkgs, err := r.packagesForFiles(ctx, changedFiles)
Earl Lee2e463fb2025-04-17 11:22:22 -070079 if err != nil {
80 // TODO: log and skip to stuff that doesn't require packages
81 slog.DebugContext(ctx, "CodeReviewer.Run: failed to get packages for files", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -070082 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -070083 }
84 allPkgList := slices.Collect(maps.Keys(allPkgs))
Earl Lee2e463fb2025-04-17 11:22:22 -070085
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +000086 var errorMessages []string // problems we want the model to address
87 var infoMessages []string // info the model should consider
88
89 // Find potentially related files that should also be considered
90 // TODO: add some caching here, since this depends only on the initial commit and the changed files, not the details of the changes
91 relatedFiles, err := r.findRelatedFiles(ctx, changedFiles)
92 if err != nil {
93 slog.DebugContext(ctx, "CodeReviewer.Run: failed to find related files", "err", err)
94 } else {
95 relatedMsg := r.formatRelatedFiles(relatedFiles)
96 if relatedMsg != "" {
97 infoMessages = append(infoMessages, relatedMsg)
98 }
99 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700100
101 testMsg, err := r.checkTests(ctx, allPkgList)
102 if err != nil {
103 slog.DebugContext(ctx, "CodeReviewer.Run: failed to check tests", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700104 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -0700105 }
106 if testMsg != "" {
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000107 errorMessages = append(errorMessages, testMsg)
Earl Lee2e463fb2025-04-17 11:22:22 -0700108 }
109
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000110 goplsMsg, err := r.checkGopls(ctx, changedFiles) // includes vet checks
Earl Lee2e463fb2025-04-17 11:22:22 -0700111 if err != nil {
112 slog.DebugContext(ctx, "CodeReviewer.Run: failed to check gopls", "err", err)
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700113 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -0700114 }
115 if goplsMsg != "" {
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000116 errorMessages = append(errorMessages, goplsMsg)
Earl Lee2e463fb2025-04-17 11:22:22 -0700117 }
118
Josh Bleecher Snydere2518e52025-04-29 11:13:40 -0700119 if r.llmReview {
120 llmComments, err := r.doLLMReview(ctx)
121 if err != nil {
122 // Log the error but don't fail the codereview if this check fails
123 slog.WarnContext(ctx, "CodeReviewer.Run: error doing LLM review", "err", err)
124 }
125 if llmComments != "" {
126 infoMessages = append(infoMessages, llmComments)
127 }
128 }
129
Josh Bleecher Snyder2dc86b92025-04-29 14:11:58 +0000130 // NOTE: If you change this output format, update the corresponding UI parsing in:
131 // webui/src/web-components/sketch-tool-card.ts (SketchToolCardCodeReview.getStatusIcon)
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000132 buf := new(strings.Builder)
133 if len(infoMessages) > 0 {
134 buf.WriteString("# Info\n\n")
135 buf.WriteString(strings.Join(infoMessages, "\n\n"))
136 buf.WriteString("\n\n")
Earl Lee2e463fb2025-04-17 11:22:22 -0700137 }
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000138 if len(errorMessages) > 0 {
139 buf.WriteString("# Errors\n\n")
140 buf.WriteString(strings.Join(errorMessages, "\n\n"))
141 buf.WriteString("\n\nPlease fix before proceeding.\n")
142 }
143 if buf.Len() == 0 {
144 buf.WriteString("OK")
145 }
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700146 return llm.TextContent(buf.String()), nil
Earl Lee2e463fb2025-04-17 11:22:22 -0700147}
148
149func (r *CodeReviewer) initializeInitialCommitWorktree(ctx context.Context) error {
150 if r.initialWorktree != "" {
151 return nil
152 }
153 tmpDir, err := os.MkdirTemp("", "sketch-codereview-worktree")
154 if err != nil {
155 return err
156 }
Philip Zeyliger49edc922025-05-14 09:45:45 -0700157 worktreeCmd := exec.CommandContext(ctx, "git", "worktree", "add", "--detach", tmpDir, r.sketchBaseRef)
Earl Lee2e463fb2025-04-17 11:22:22 -0700158 worktreeCmd.Dir = r.repoRoot
159 out, err := worktreeCmd.CombinedOutput()
160 if err != nil {
161 return fmt.Errorf("unable to create worktree for initial commit: %w\n%s", err, out)
162 }
163 r.initialWorktree = tmpDir
164 return nil
165}
166
167func (r *CodeReviewer) checkTests(ctx context.Context, pkgList []string) (string, error) {
168 goTestArgs := []string{"test", "-json", "-v"}
169 goTestArgs = append(goTestArgs, pkgList...)
170
171 afterTestCmd := exec.CommandContext(ctx, "go", goTestArgs...)
172 afterTestCmd.Dir = r.repoRoot
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000173 afterTestOut, _ := afterTestCmd.Output()
174 // unfortunately, we can't short-circuit here even if all tests pass,
175 // because we need to check for skipped tests.
Earl Lee2e463fb2025-04-17 11:22:22 -0700176
177 err := r.initializeInitialCommitWorktree(ctx)
178 if err != nil {
179 return "", err
180 }
181
182 beforeTestCmd := exec.CommandContext(ctx, "go", goTestArgs...)
183 beforeTestCmd.Dir = r.initialWorktree
184 beforeTestOut, _ := beforeTestCmd.Output() // ignore error, interesting info is in the output
185
186 // Parse the jsonl test results
187 beforeResults, beforeParseErr := parseTestResults(beforeTestOut)
188 if beforeParseErr != nil {
189 return "", fmt.Errorf("unable to parse test results for initial commit: %w\n%s", beforeParseErr, beforeTestOut)
190 }
191 afterResults, afterParseErr := parseTestResults(afterTestOut)
192 if afterParseErr != nil {
193 return "", fmt.Errorf("unable to parse test results for current commit: %w\n%s", afterParseErr, afterTestOut)
194 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700195 testRegressions, err := r.compareTestResults(beforeResults, afterResults)
196 if err != nil {
197 return "", fmt.Errorf("failed to compare test results: %w", err)
198 }
199 // TODO: better output formatting?
200 res := r.formatTestRegressions(testRegressions)
201 return res, nil
202}
203
Earl Lee2e463fb2025-04-17 11:22:22 -0700204// GoplsIssue represents a single issue reported by gopls check
205type GoplsIssue struct {
206 Position string // File position in format "file:line:col-range"
207 Message string // Description of the issue
208}
209
210// checkGopls runs gopls check on the provided files in both the current and initial state,
211// compares the results, and reports any new issues introduced in the current state.
212func (r *CodeReviewer) checkGopls(ctx context.Context, changedFiles []string) (string, error) {
213 if len(changedFiles) == 0 {
214 return "", nil // no files to check
215 }
216
217 // Filter out non-Go files as gopls only works on Go files
218 // and verify they still exist (not deleted)
219 var goFiles []string
220 for _, file := range changedFiles {
221 if !strings.HasSuffix(file, ".go") {
222 continue // not a Go file
223 }
224
225 // Check if the file still exists (not deleted)
226 if _, err := os.Stat(file); os.IsNotExist(err) {
227 continue // file doesn't exist anymore (deleted)
228 }
229
230 goFiles = append(goFiles, file)
231 }
232
233 if len(goFiles) == 0 {
234 return "", nil // no Go files to check
235 }
236
237 // Run gopls check on the current state
238 goplsArgs := append([]string{"check"}, goFiles...)
239
240 afterGoplsCmd := exec.CommandContext(ctx, "gopls", goplsArgs...)
241 afterGoplsCmd.Dir = r.repoRoot
242 afterGoplsOut, err := afterGoplsCmd.CombinedOutput() // gopls returns non-zero if it finds issues
243 if err != nil {
244 // Check if the output looks like real gopls issues or if it's just error output
245 if !looksLikeGoplsIssues(afterGoplsOut) {
246 slog.WarnContext(ctx, "CodeReviewer.checkGopls: gopls check failed to run properly", "err", err, "output", string(afterGoplsOut))
247 return "", nil // Skip rather than failing the entire code review
248 }
249 // Otherwise, proceed with parsing - it's likely just the non-zero exit code due to found issues
250 }
251
252 // Parse the output
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000253 afterIssues := parseGoplsOutput(r.repoRoot, afterGoplsOut)
Earl Lee2e463fb2025-04-17 11:22:22 -0700254
255 // If no issues were found, we're done
256 if len(afterIssues) == 0 {
257 return "", nil
258 }
259
260 // Gopls detected issues in the current state, check if they existed in the initial state
261 initErr := r.initializeInitialCommitWorktree(ctx)
262 if initErr != nil {
263 return "", err
264 }
265
266 // For each file that exists in the initial commit, run gopls check
267 var initialFilesToCheck []string
268 for _, file := range goFiles {
269 // Get relative path for git operations
270 relFile, err := filepath.Rel(r.repoRoot, file)
271 if err != nil {
272 slog.WarnContext(ctx, "CodeReviewer.checkGopls: failed to get relative path", "repo_root", r.repoRoot, "file", file, "err", err)
273 continue
274 }
275
276 // Check if the file exists in the initial commit
Philip Zeyliger49edc922025-05-14 09:45:45 -0700277 checkCmd := exec.CommandContext(ctx, "git", "cat-file", "-e", fmt.Sprintf("%s:%s", r.sketchBaseRef, relFile))
Earl Lee2e463fb2025-04-17 11:22:22 -0700278 checkCmd.Dir = r.repoRoot
279 if err := checkCmd.Run(); err == nil {
280 // File exists in initial commit
281 initialFilePath := filepath.Join(r.initialWorktree, relFile)
282 initialFilesToCheck = append(initialFilesToCheck, initialFilePath)
283 }
284 }
285
286 // Run gopls check on the files that existed in the initial commit
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000287 var beforeIssues []GoplsIssue
Earl Lee2e463fb2025-04-17 11:22:22 -0700288 if len(initialFilesToCheck) > 0 {
289 beforeGoplsArgs := append([]string{"check"}, initialFilesToCheck...)
290 beforeGoplsCmd := exec.CommandContext(ctx, "gopls", beforeGoplsArgs...)
291 beforeGoplsCmd.Dir = r.initialWorktree
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000292 beforeGoplsOut, beforeCmdErr := beforeGoplsCmd.CombinedOutput()
Earl Lee2e463fb2025-04-17 11:22:22 -0700293 if beforeCmdErr != nil && !looksLikeGoplsIssues(beforeGoplsOut) {
294 // If gopls fails to run properly on the initial commit, log a warning and continue
295 // with empty before issues - this will be conservative and report more issues
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000296 slog.WarnContext(ctx, "CodeReviewer.checkGopls: gopls check failed on initial commit", "err", err, "output", string(beforeGoplsOut))
Earl Lee2e463fb2025-04-17 11:22:22 -0700297 } else {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000298 beforeIssues = parseGoplsOutput(r.initialWorktree, beforeGoplsOut)
Earl Lee2e463fb2025-04-17 11:22:22 -0700299 }
300 }
301
302 // Find new issues that weren't present in the initial state
303 goplsRegressions := findGoplsRegressions(beforeIssues, afterIssues)
304 if len(goplsRegressions) == 0 {
305 return "", nil // no new issues
306 }
307
308 // Format the results
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000309 return r.formatGoplsRegressions(goplsRegressions), nil
Earl Lee2e463fb2025-04-17 11:22:22 -0700310}
311
312// parseGoplsOutput parses the text output from gopls check
313// Each line has the format: '/path/to/file.go:448:22-26: unused parameter: path'
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000314func parseGoplsOutput(root string, output []byte) []GoplsIssue {
Earl Lee2e463fb2025-04-17 11:22:22 -0700315 var issues []GoplsIssue
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +0000316 for line := range strings.Lines(string(output)) {
Earl Lee2e463fb2025-04-17 11:22:22 -0700317 line = strings.TrimSpace(line)
318 if line == "" {
319 continue
320 }
321
322 // Skip lines that look like error messages rather than gopls issues
323 if strings.HasPrefix(line, "Error:") ||
324 strings.HasPrefix(line, "Failed:") ||
325 strings.HasPrefix(line, "Warning:") ||
326 strings.HasPrefix(line, "gopls:") {
327 continue
328 }
329
330 // Find the first colon that separates the file path from the line number
331 firstColonIdx := strings.Index(line, ":")
332 if firstColonIdx < 0 {
333 continue // Invalid format
334 }
335
336 // Verify the part before the first colon looks like a file path
337 potentialPath := line[:firstColonIdx]
338 if !strings.HasSuffix(potentialPath, ".go") {
339 continue // Not a Go file path
340 }
341
342 // Find the position of the first message separator ': '
343 // This separates the position info from the message
344 messageStart := strings.Index(line, ": ")
345 if messageStart < 0 || messageStart <= firstColonIdx {
346 continue // Invalid format
347 }
348
349 // Extract position and message
350 position := line[:messageStart]
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000351 rel, err := filepath.Rel(root, position)
352 if err == nil {
353 position = rel
354 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700355 message := line[messageStart+2:] // Skip the ': ' separator
356
357 // Verify position has the expected format (at least 2 colons for line:col)
358 colonCount := strings.Count(position, ":")
359 if colonCount < 2 {
360 continue // Not enough position information
361 }
362
363 issues = append(issues, GoplsIssue{
364 Position: position,
365 Message: message,
366 })
367 }
368
369 return issues
370}
371
372// looksLikeGoplsIssues checks if the output appears to be actual gopls issues
373// rather than error messages about gopls itself failing
374func looksLikeGoplsIssues(output []byte) bool {
375 // If output is empty, it's not valid issues
376 if len(output) == 0 {
377 return false
378 }
379
380 // Check if output has at least one line that looks like a gopls issue
381 // A gopls issue looks like: '/path/to/file.go:123:45-67: message'
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +0000382 for line := range strings.Lines(string(output)) {
Earl Lee2e463fb2025-04-17 11:22:22 -0700383 line = strings.TrimSpace(line)
384 if line == "" {
385 continue
386 }
387
388 // A gopls issue has at least two colons (file path, line number, column)
389 // and contains a colon followed by a space (separating position from message)
390 colonCount := strings.Count(line, ":")
391 hasSeparator := strings.Contains(line, ": ")
392
393 if colonCount >= 2 && hasSeparator {
394 // Check if it starts with a likely file path (ending in .go)
395 parts := strings.SplitN(line, ":", 2)
396 if strings.HasSuffix(parts[0], ".go") {
397 return true
398 }
399 }
400 }
401 return false
402}
403
404// normalizeGoplsPosition extracts just the file path from a position string
405func normalizeGoplsPosition(position string) string {
406 // Extract just the file path by taking everything before the first colon
407 parts := strings.Split(position, ":")
408 if len(parts) < 1 {
409 return position
410 }
411 return parts[0]
412}
413
414// findGoplsRegressions identifies gopls issues that are new in the after state
415func findGoplsRegressions(before, after []GoplsIssue) []GoplsIssue {
416 var regressions []GoplsIssue
417
418 // Build map of before issues for easier lookup
419 beforeIssueMap := make(map[string]map[string]bool) // file -> message -> exists
420 for _, issue := range before {
421 file := normalizeGoplsPosition(issue.Position)
422 if _, exists := beforeIssueMap[file]; !exists {
423 beforeIssueMap[file] = make(map[string]bool)
424 }
425 // Store both the exact message and the general issue type for fuzzy matching
426 beforeIssueMap[file][issue.Message] = true
427
428 // Extract the general issue type (everything before the first ':' in the message)
429 generalIssue := issue.Message
430 if colonIdx := strings.Index(issue.Message, ":"); colonIdx > 0 {
431 generalIssue = issue.Message[:colonIdx]
432 }
433 beforeIssueMap[file][generalIssue] = true
434 }
435
436 // Check each after issue to see if it's new
437 for _, afterIssue := range after {
438 file := normalizeGoplsPosition(afterIssue.Position)
439 isNew := true
440
441 if fileIssues, fileExists := beforeIssueMap[file]; fileExists {
442 // Check for exact message match
443 if fileIssues[afterIssue.Message] {
444 isNew = false
445 } else {
446 // Check for general issue type match
447 generalIssue := afterIssue.Message
448 if colonIdx := strings.Index(afterIssue.Message, ":"); colonIdx > 0 {
449 generalIssue = afterIssue.Message[:colonIdx]
450 }
451 if fileIssues[generalIssue] {
452 isNew = false
453 }
454 }
455 }
456
457 if isNew {
458 regressions = append(regressions, afterIssue)
459 }
460 }
461
462 // Sort regressions for deterministic output
463 slices.SortFunc(regressions, func(a, b GoplsIssue) int {
464 return strings.Compare(a.Position, b.Position)
465 })
466
467 return regressions
468}
469
470// formatGoplsRegressions generates a human-readable summary of gopls check regressions
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000471func (r *CodeReviewer) formatGoplsRegressions(regressions []GoplsIssue) string {
Earl Lee2e463fb2025-04-17 11:22:22 -0700472 if len(regressions) == 0 {
473 return ""
474 }
475
476 var sb strings.Builder
477 sb.WriteString("Gopls check issues detected:\n\n")
478
479 // Format each issue
480 for i, issue := range regressions {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000481 sb.WriteString(fmt.Sprintf("%d. %s: %s\n", i+1, filepath.Join(r.repoRoot, issue.Position), issue.Message))
Earl Lee2e463fb2025-04-17 11:22:22 -0700482 }
483
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000484 sb.WriteString("\nIMPORTANT: Only fix new gopls check issues in parts of the code that you have already edited.")
485 sb.WriteString(" Do not change existing code that was not part of your current edits.\n")
Earl Lee2e463fb2025-04-17 11:22:22 -0700486 return sb.String()
487}
488
489func (r *CodeReviewer) HasReviewed(commit string) bool {
490 return slices.Contains(r.reviewed, commit)
491}
492
493func (r *CodeReviewer) IsInitialCommit(commit string) bool {
Philip Zeyliger49edc922025-05-14 09:45:45 -0700494 return commit == r.sketchBaseRef
Earl Lee2e463fb2025-04-17 11:22:22 -0700495}
496
Philip Zeyliger49edc922025-05-14 09:45:45 -0700497// requireHEADDescendantOfSketchBaseRef returns an error if HEAD is not a descendant of r.initialCommit.
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000498// This serves two purposes:
499// - ensures we're not still on the initial commit
500// - ensures we're not on a separate branch or upstream somewhere, which would be confusing
Philip Zeyliger49edc922025-05-14 09:45:45 -0700501func (r *CodeReviewer) requireHEADDescendantOfSketchBaseRef(ctx context.Context) error {
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000502 head, err := r.CurrentCommit(ctx)
503 if err != nil {
504 return err
505 }
506
507 // Note: Git's merge-base --is-ancestor checks strict ancestry (i.e., <), so a commit is NOT an ancestor of itself.
Philip Zeyliger49edc922025-05-14 09:45:45 -0700508 cmd := exec.CommandContext(ctx, "git", "merge-base", "--is-ancestor", r.sketchBaseRef, head)
Josh Bleecher Snyderc72ceb22025-05-05 23:30:15 +0000509 cmd.Dir = r.repoRoot
510 err = cmd.Run()
511 if err != nil {
512 if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
513 // Exit code 1 means not an ancestor
514 return fmt.Errorf("HEAD is not a descendant of the initial commit")
515 }
516 return fmt.Errorf("failed to check whether initial commit is ancestor: %w", err)
517 }
518 return nil
519}
520
Earl Lee2e463fb2025-04-17 11:22:22 -0700521// packagesForFiles returns maps of packages related to the given files:
522// 1. directPkgs: packages that directly contain the changed files
523// 2. allPkgs: all packages that might be affected, including downstream packages that depend on the direct packages
524// It may include false positives.
525// Files must be absolute paths!
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000526func (r *CodeReviewer) packagesForFiles(ctx context.Context, files []string) (allPkgs map[string]*packages.Package, err error) {
Earl Lee2e463fb2025-04-17 11:22:22 -0700527 for _, f := range files {
528 if !filepath.IsAbs(f) {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000529 return nil, fmt.Errorf("path %q is not absolute", f)
Earl Lee2e463fb2025-04-17 11:22:22 -0700530 }
531 }
532 cfg := &packages.Config{
533 Mode: packages.LoadImports | packages.NeedEmbedFiles,
534 Context: ctx,
535 // Logf: func(msg string, args ...any) {
536 // slog.DebugContext(ctx, "loading go packages", "msg", fmt.Sprintf(msg, args...))
537 // },
538 // TODO: in theory, go.mod might not be in the repo root, and there might be multiple go.mod files.
539 // We can cross that bridge when we get there.
540 Dir: r.repoRoot,
541 Tests: true,
542 }
543 universe, err := packages.Load(cfg, "./...")
544 if err != nil {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000545 return nil, err
Earl Lee2e463fb2025-04-17 11:22:22 -0700546 }
547 // Identify packages that directly contain the changed files
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000548 directPkgs := make(map[string]*packages.Package) // import path -> package
Earl Lee2e463fb2025-04-17 11:22:22 -0700549 for _, pkg := range universe {
Earl Lee2e463fb2025-04-17 11:22:22 -0700550 pkgFiles := allFiles(pkg)
Earl Lee2e463fb2025-04-17 11:22:22 -0700551 for _, file := range files {
552 if pkgFiles[file] {
553 // prefer test packages, as they contain strictly more files (right?)
554 prev := directPkgs[pkg.PkgPath]
555 if prev == nil || prev.ForTest == "" {
556 directPkgs[pkg.PkgPath] = pkg
557 }
558 }
559 }
560 }
561
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000562 allPkgs = maps.Clone(directPkgs)
Earl Lee2e463fb2025-04-17 11:22:22 -0700563
564 // Add packages that depend on the direct packages
565 addDependentPackages(universe, allPkgs)
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000566 return allPkgs, nil
Earl Lee2e463fb2025-04-17 11:22:22 -0700567}
568
569// allFiles returns all files that might be referenced by the package.
570// It may contain false positives.
571func allFiles(p *packages.Package) map[string]bool {
572 files := make(map[string]bool)
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000573 // Add files from package info
Earl Lee2e463fb2025-04-17 11:22:22 -0700574 add := [][]string{p.GoFiles, p.CompiledGoFiles, p.OtherFiles, p.EmbedFiles, p.IgnoredFiles}
575 for _, extra := range add {
576 for _, file := range extra {
577 files[file] = true
578 }
579 }
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000580 // Add files from testdata directory
581 testdataDir := filepath.Join(p.Dir, "testdata")
582 if _, err := os.Stat(testdataDir); err == nil {
583 fsys := os.DirFS(p.Dir)
584 fs.WalkDir(fsys, "testdata", func(path string, d fs.DirEntry, err error) error {
585 if err == nil && !d.IsDir() {
586 files[filepath.Join(p.Dir, path)] = true
587 }
588 return nil
589 })
590 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700591 return files
592}
593
594// addDependentPackages adds to pkgs all packages from universe
595// that directly or indirectly depend on any package already in pkgs.
596func addDependentPackages(universe []*packages.Package, pkgs map[string]*packages.Package) {
597 for {
598 changed := false
599 for _, p := range universe {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000600 if strings.HasSuffix(p.PkgPath, ".test") { // ick, but I don't see another way
601 // skip test packages
602 continue
603 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700604 if _, ok := pkgs[p.PkgPath]; ok {
605 // already in pkgs
606 continue
607 }
608 for importPath := range p.Imports {
609 if _, ok := pkgs[importPath]; ok {
610 // imports a package dependent on pkgs, add it
611 pkgs[p.PkgPath] = p
612 changed = true
613 break
614 }
615 }
616 }
617 if !changed {
618 break
619 }
620 }
621}
622
623// testJSON is a union of BuildEvent and TestEvent
624type testJSON struct {
625 // TestEvent only:
626 // The Time field holds the time the event happened. It is conventionally omitted
627 // for cached test results.
628 Time time.Time `json:"Time"`
629 // BuildEvent only:
630 // The ImportPath field gives the package ID of the package being built.
631 // This matches the Package.ImportPath field of go list -json and the
632 // TestEvent.FailedBuild field of go test -json. Note that it does not
633 // match TestEvent.Package.
634 ImportPath string `json:"ImportPath"` // BuildEvent only
635 // TestEvent only:
636 // The Package field, if present, specifies the package being tested. When the
637 // go command runs parallel tests in -json mode, events from different tests are
638 // interlaced; the Package field allows readers to separate them.
639 Package string `json:"Package"`
640 // Action is used in both BuildEvent and TestEvent.
641 // It is the key to distinguishing between them.
642 // BuildEvent:
643 // build-output or build-fail
644 // TestEvent:
645 // start, run, pause, cont, pass, bench, fail, output, skip
646 Action string `json:"Action"`
647 // TestEvent only:
648 // The Test field, if present, specifies the test, example, or benchmark function
649 // that caused the event. Events for the overall package test do not set Test.
650 Test string `json:"Test"`
651 // TestEvent only:
652 // The Elapsed field is set for "pass" and "fail" events. It gives the time elapsed in seconds
653 // for the specific test or the overall package test that passed or failed.
654 Elapsed float64
655 // TestEvent:
656 // The Output field is set for Action == "output" and is a portion of the
657 // test's output (standard output and standard error merged together). The
658 // output is unmodified except that invalid UTF-8 output from a test is coerced
659 // into valid UTF-8 by use of replacement characters. With that one exception,
660 // the concatenation of the Output fields of all output events is the exact output
661 // of the test execution.
662 // BuildEvent:
663 // The Output field is set for Action == "build-output" and is a portion of
664 // the build's output. The concatenation of the Output fields of all output
665 // events is the exact output of the build. A single event may contain one
666 // or more lines of output and there may be more than one output event for
667 // a given ImportPath. This matches the definition of the TestEvent.Output
668 // field produced by go test -json.
669 Output string `json:"Output"`
670 // TestEvent only:
671 // The FailedBuild field is set for Action == "fail" if the test failure was caused
672 // by a build failure. It contains the package ID of the package that failed to
673 // build. This matches the ImportPath field of the "go list" output, as well as the
674 // BuildEvent.ImportPath field as emitted by "go build -json".
675 FailedBuild string `json:"FailedBuild"`
676}
677
678// parseTestResults converts test output in JSONL format into a slice of testJSON objects
679func parseTestResults(testOutput []byte) ([]testJSON, error) {
680 var results []testJSON
681 dec := json.NewDecoder(bytes.NewReader(testOutput))
682 for {
683 var event testJSON
684 if err := dec.Decode(&event); err != nil {
685 if err == io.EOF {
686 break
687 }
688 return nil, err
689 }
690 results = append(results, event)
691 }
692 return results, nil
693}
694
695// testStatus represents the status of a test in a given commit
696type testStatus int
697
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700698//go:generate go tool golang.org/x/tools/cmd/stringer -type=testStatus -trimprefix=testStatus
Earl Lee2e463fb2025-04-17 11:22:22 -0700699const (
700 testStatusUnknown testStatus = iota
701 testStatusPass
702 testStatusFail
703 testStatusBuildFail
704 testStatusSkip
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000705 testStatusNoTests // no tests exist for this package
Earl Lee2e463fb2025-04-17 11:22:22 -0700706)
707
Earl Lee2e463fb2025-04-17 11:22:22 -0700708// testRegression represents a test that regressed between commits
709type testRegression struct {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000710 Package string
711 Test string // empty for package tests
Earl Lee2e463fb2025-04-17 11:22:22 -0700712 BeforeStatus testStatus
713 AfterStatus testStatus
714 Output string // failure output in the after state
715}
716
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000717func (r *testRegression) Source() string {
718 if r.Test == "" {
719 return r.Package
Earl Lee2e463fb2025-04-17 11:22:22 -0700720 }
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000721 return fmt.Sprintf("%s.%s", r.Package, r.Test)
722}
Earl Lee2e463fb2025-04-17 11:22:22 -0700723
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000724type packageResult struct {
725 Status testStatus // overall status for the package
726 TestStatus map[string]testStatus // name -> status
727 TestOutput map[string][]string // name -> output parts
728}
Earl Lee2e463fb2025-04-17 11:22:22 -0700729
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000730// collectTestStatuses processes a slice of test events and returns rich status information
731func collectTestStatuses(results []testJSON) map[string]*packageResult {
732 m := make(map[string]*packageResult)
733
734 for _, event := range results {
735 pkg := event.Package
736 p, ok := m[pkg]
737 if !ok {
738 p = new(packageResult)
739 p.TestStatus = make(map[string]testStatus)
740 p.TestOutput = make(map[string][]string)
741 m[pkg] = p
Earl Lee2e463fb2025-04-17 11:22:22 -0700742 }
743
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000744 switch event.Action {
745 case "output":
746 p.TestOutput[event.Test] = append(p.TestOutput[event.Test], event.Output)
Earl Lee2e463fb2025-04-17 11:22:22 -0700747 continue
Earl Lee2e463fb2025-04-17 11:22:22 -0700748 case "pass":
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000749 if event.Test == "" {
750 p.Status = testStatusPass
751 } else {
752 p.TestStatus[event.Test] = testStatusPass
753 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700754 case "fail":
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000755 if event.Test == "" {
756 if event.FailedBuild != "" {
757 p.Status = testStatusBuildFail
758 } else {
759 p.Status = testStatusFail
760 }
761 } else {
762 p.TestStatus[event.Test] = testStatusFail
763 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700764 case "skip":
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000765 if event.Test == "" {
766 p.Status = testStatusNoTests
767 } else {
768 p.TestStatus[event.Test] = testStatusSkip
769 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700770 }
771 }
772
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000773 return m
Earl Lee2e463fb2025-04-17 11:22:22 -0700774}
775
776// compareTestResults identifies tests that have regressed between commits
777func (r *CodeReviewer) compareTestResults(beforeResults, afterResults []testJSON) ([]testRegression, error) {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000778 before := collectTestStatuses(beforeResults)
779 after := collectTestStatuses(afterResults)
780 var testLevelRegressions []testRegression
781 var packageLevelRegressions []testRegression
Earl Lee2e463fb2025-04-17 11:22:22 -0700782
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000783 afterPkgs := slices.Sorted(maps.Keys(after))
784 for _, pkg := range afterPkgs {
785 afterResult := after[pkg]
786 afterStatus := afterResult.Status
787 // Can't short-circuit here when tests are passing, because we need to check for skipped tests.
788 beforeResult, ok := before[pkg]
789 beforeStatus := testStatusNoTests
790 if ok {
791 beforeStatus = beforeResult.Status
792 }
793 // If things no longer build, stop at the package level.
794 // Otherwise, proceed to the test level, so we have more precise information.
795 if afterStatus == testStatusBuildFail && beforeStatus != testStatusBuildFail {
796 packageLevelRegressions = append(packageLevelRegressions, testRegression{
797 Package: pkg,
798 BeforeStatus: beforeStatus,
799 AfterStatus: afterStatus,
800 })
801 continue
802 }
803 tests := slices.Sorted(maps.Keys(afterResult.TestStatus))
804 for _, test := range tests {
805 afterStatus := afterResult.TestStatus[test]
806 switch afterStatus {
807 case testStatusPass:
808 continue
809 case testStatusUnknown:
810 slog.WarnContext(context.Background(), "unknown test status", "package", pkg, "test", test)
811 continue
812 }
813 beforeStatus := beforeResult.TestStatus[test]
814 if isRegression(beforeStatus, afterStatus) {
815 testLevelRegressions = append(testLevelRegressions, testRegression{
816 Package: pkg,
817 Test: test,
818 BeforeStatus: beforeStatus,
819 AfterStatus: afterStatus,
820 Output: strings.Join(afterResult.TestOutput[test], ""),
821 })
822 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700823 }
824 }
825
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000826 // If we have test-level regressions, report only those
827 // Otherwise, report package-level regressions
Earl Lee2e463fb2025-04-17 11:22:22 -0700828 var regressions []testRegression
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000829 if len(testLevelRegressions) > 0 {
830 regressions = testLevelRegressions
831 } else {
832 regressions = packageLevelRegressions
Earl Lee2e463fb2025-04-17 11:22:22 -0700833 }
834
835 // Sort regressions for consistent output
836 slices.SortFunc(regressions, func(a, b testRegression) int {
837 // First by package
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000838 if c := strings.Compare(a.Package, b.Package); c != 0 {
Earl Lee2e463fb2025-04-17 11:22:22 -0700839 return c
840 }
841 // Then by test name
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000842 return strings.Compare(a.Test, b.Test)
Earl Lee2e463fb2025-04-17 11:22:22 -0700843 })
844
845 return regressions, nil
846}
847
848// badnessLevels maps test status to a badness level
849// Higher values indicate worse status (more severe issues)
850var badnessLevels = map[testStatus]int{
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000851 testStatusBuildFail: 5, // Worst
852 testStatusFail: 4,
853 testStatusSkip: 3,
854 testStatusNoTests: 2,
Earl Lee2e463fb2025-04-17 11:22:22 -0700855 testStatusPass: 1,
856 testStatusUnknown: 0, // Least bad - avoids false positives
857}
858
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000859// regressionFormatter defines a mapping of before/after state pairs to descriptive messages
860type regressionKey struct {
861 before, after testStatus
862}
863
864var regressionMessages = map[regressionKey]string{
865 {testStatusUnknown, testStatusBuildFail}: "New test has build/vet errors",
866 {testStatusUnknown, testStatusFail}: "New test is failing",
867 {testStatusUnknown, testStatusSkip}: "New test is skipped",
868 {testStatusPass, testStatusBuildFail}: "Was passing, now has build/vet errors",
869 {testStatusPass, testStatusFail}: "Was passing, now failing",
870 {testStatusPass, testStatusSkip}: "Was passing, now skipped",
871 {testStatusNoTests, testStatusBuildFail}: "Previously had no tests, now has build/vet errors",
872 {testStatusNoTests, testStatusFail}: "Previously had no tests, now has failing tests",
873 {testStatusNoTests, testStatusSkip}: "Previously had no tests, now has skipped tests",
874 {testStatusSkip, testStatusBuildFail}: "Was skipped, now has build/vet errors",
875 {testStatusSkip, testStatusFail}: "Was skipped, now failing",
876 {testStatusFail, testStatusBuildFail}: "Was failing, now has build/vet errors",
877}
878
Earl Lee2e463fb2025-04-17 11:22:22 -0700879// isRegression determines if a test has regressed based on before and after status
880// A regression is defined as an increase in badness level
881func isRegression(before, after testStatus) bool {
882 // Higher badness level means worse status
883 return badnessLevels[after] > badnessLevels[before]
884}
885
886// formatTestRegressions generates a human-readable summary of test regressions
887func (r *CodeReviewer) formatTestRegressions(regressions []testRegression) string {
888 if len(regressions) == 0 {
889 return ""
890 }
891
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000892 buf := new(strings.Builder)
Philip Zeyliger49edc922025-05-14 09:45:45 -0700893 fmt.Fprintf(buf, "Test regressions detected between initial commit (%s) and HEAD:\n\n", r.sketchBaseRef)
Earl Lee2e463fb2025-04-17 11:22:22 -0700894
895 for i, reg := range regressions {
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000896 fmt.Fprintf(buf, "%d: %v: ", i+1, reg.Source())
897 key := regressionKey{reg.BeforeStatus, reg.AfterStatus}
898 message, exists := regressionMessages[key]
899 if !exists {
900 message = "Regression detected"
Earl Lee2e463fb2025-04-17 11:22:22 -0700901 }
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000902 fmt.Fprintf(buf, "%s\n", message)
Earl Lee2e463fb2025-04-17 11:22:22 -0700903 }
904
Josh Bleecher Snyder833a0f82025-04-24 18:39:36 +0000905 return buf.String()
Earl Lee2e463fb2025-04-17 11:22:22 -0700906}
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +0000907
908// RelatedFile represents a file historically related to the changed files
909type RelatedFile struct {
910 Path string // Path to the file
911 Correlation float64 // Correlation score (0.0-1.0)
912}
913
914// findRelatedFiles identifies files that are historically related to the changed files
915// by analyzing git commit history for co-occurrences.
916func (r *CodeReviewer) findRelatedFiles(ctx context.Context, changedFiles []string) ([]RelatedFile, error) {
917 commits, err := r.getCommitsTouchingFiles(ctx, changedFiles)
918 if err != nil {
919 return nil, fmt.Errorf("failed to get commits touching files: %w", err)
920 }
921 if len(commits) == 0 {
922 return nil, nil
923 }
924
925 relChanged := make(map[string]bool, len(changedFiles))
926 for _, file := range changedFiles {
927 rel, err := filepath.Rel(r.repoRoot, file)
928 if err != nil {
929 return nil, fmt.Errorf("failed to get relative path for %s: %w", file, err)
930 }
931 relChanged[rel] = true
932 }
933
934 historyFiles := make(map[string]int)
935 var historyMu sync.Mutex
936
937 maxWorkers := runtime.GOMAXPROCS(0)
938 semaphore := make(chan bool, maxWorkers)
939 var wg sync.WaitGroup
940
941 for _, commit := range commits {
942 wg.Add(1)
943 semaphore <- true // acquire
944
945 go func(commit string) {
946 defer wg.Done()
947 defer func() { <-semaphore }() // release
948 commitFiles, err := r.getFilesInCommit(ctx, commit)
949 if err != nil {
950 slog.WarnContext(ctx, "Failed to get files in commit", "commit", commit, "err", err)
951 return
952 }
953 incr := 0
954 for _, file := range commitFiles {
955 if relChanged[file] {
956 incr++
957 }
958 }
959 if incr == 0 {
960 return
961 }
962 historyMu.Lock()
963 defer historyMu.Unlock()
964 for _, file := range commitFiles {
965 historyFiles[file] += incr
966 }
967 }(commit)
968 }
969 wg.Wait()
970
971 // normalize
972 maxCount := 0
973 for _, count := range historyFiles {
974 maxCount = max(maxCount, count)
975 }
976 if maxCount == 0 {
977 return nil, nil
978 }
979
980 var relatedFiles []RelatedFile
981 for file, count := range historyFiles {
982 if relChanged[file] {
983 // Don't include inputs in the output.
984 continue
985 }
986 correlation := float64(count) / float64(maxCount)
987 // Require min correlation to avoid noise
988 if correlation >= 0.1 {
989 relatedFiles = append(relatedFiles, RelatedFile{Path: file, Correlation: correlation})
990 }
991 }
992
993 // Highest correlation first
994 slices.SortFunc(relatedFiles, func(a, b RelatedFile) int {
995 return -1 * cmp.Compare(a.Correlation, b.Correlation)
996 })
997
998 // Limit to 1 correlated file per input file.
999 // (Arbitrary limit, to be adjusted.)
1000 maxFiles := len(changedFiles)
1001 if len(relatedFiles) > maxFiles {
1002 relatedFiles = relatedFiles[:maxFiles]
1003 }
1004
1005 // TODO: add an LLM in the mix here (like the keyword search tool) to do a filtering pass,
1006 // and then increase the strength of the wording in the relatedFiles message.
1007
1008 return relatedFiles, nil
1009}
1010
1011// getCommitsTouchingFiles returns all commits that touch any of the specified files
1012func (r *CodeReviewer) getCommitsTouchingFiles(ctx context.Context, files []string) ([]string, error) {
1013 if len(files) == 0 {
1014 return nil, nil
1015 }
Josh Bleecher Snyderd4b1cc72025-04-29 13:49:18 +00001016 fileArgs := append([]string{"rev-list", "--all", "--date-order", "--max-count=100", "--"}, files...)
Josh Bleecher Snyderffecb1e2025-04-28 18:59:14 +00001017 cmd := exec.CommandContext(ctx, "git", fileArgs...)
1018 cmd.Dir = r.repoRoot
1019 out, err := cmd.CombinedOutput()
1020 if err != nil {
1021 return nil, fmt.Errorf("failed to get commits: %w\n%s", err, out)
1022 }
1023 return nonEmptyTrimmedLines(out), nil
1024}
1025
1026// getFilesInCommit returns all files changed in a specific commit
1027func (r *CodeReviewer) getFilesInCommit(ctx context.Context, commit string) ([]string, error) {
1028 cmd := exec.CommandContext(ctx, "git", "diff-tree", "--no-commit-id", "--name-only", "-r", commit)
1029 cmd.Dir = r.repoRoot
1030 out, err := cmd.CombinedOutput()
1031 if err != nil {
1032 return nil, fmt.Errorf("failed to get files in commit: %w\n%s", err, out)
1033 }
1034 return nonEmptyTrimmedLines(out), nil
1035}
1036
1037func nonEmptyTrimmedLines(b []byte) []string {
1038 var lines []string
1039 for line := range strings.Lines(string(b)) {
1040 line = strings.TrimSpace(line)
1041 if line != "" {
1042 lines = append(lines, line)
1043 }
1044 }
1045 return lines
1046}
1047
1048// formatRelatedFiles formats the related files list into a human-readable message
1049func (r *CodeReviewer) formatRelatedFiles(files []RelatedFile) string {
1050 if len(files) == 0 {
1051 return ""
1052 }
1053
1054 buf := new(strings.Builder)
1055
1056 fmt.Fprintf(buf, "Potentially related files:\n\n")
1057
1058 for _, file := range files {
1059 fmt.Fprintf(buf, "- %s (%0.0f%%)\n", file.Path, 100*file.Correlation)
1060 }
1061
1062 fmt.Fprintf(buf, "\nThese files have historically changed with the files you have modified. Consider whether they require updates as well.\n")
1063 return buf.String()
1064}