Blame - claudetool/codereview/differential.go - sketch

blob: 6aa8dfe7dc600d57b653dc60a701190a034b867a [file] [log] [blame]

Josh Bleecher Snyder	f4047bb	2025-05-05 23:02:56 +0000	[diff] [blame^]	1	package codereview
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	2
				3	import (
				4	"bytes"
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	5	"cmp"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	6	"context"
				7	"encoding/json"
				8	"fmt"
				9	"io"
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	10	"io/fs"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	11	"log/slog"
				12	"maps"
				13	"os"
				14	"os/exec"
				15	"path/filepath"
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	16	"runtime"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	17	"slices"
				18	"strings"
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	19	"sync"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	20	"time"
				21
				22	"golang.org/x/tools/go/packages"
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	23	"sketch.dev/llm"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	24	)
				25
				26	// This file does differential quality analysis of a commit relative to a base commit.
				27
				28	// Tool returns a tool spec for a CodeReview tool backed by r.
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	29	func (r CodeReviewer) Tool() llm.Tool {
				30	spec := &llm.Tool{
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	31	Name: "codereview",
				32	Description: `Run an automated code review.`,
				33	// If you modify this, update the termui template for prettier rendering.
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	34	InputSchema: llm.MustSchema(`{"type": "object", "properties": {}}`),
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	35	Run: r.Run,
				36	}
				37	return spec
				38	}
				39
				40	func (r *CodeReviewer) Run(ctx context.Context, m json.RawMessage) (string, error) {
Josh Bleecher Snyder	2dc86b9	2025-04-29 14:11:58 +0000	[diff] [blame]	41	// NOTE: If you add or modify error messages here, update the corresponding UI parsing in:
				42	// webui/src/web-components/sketch-tool-card.ts (SketchToolCardCodeReview.getStatusIcon)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	43	if err := r.RequireNormalGitState(ctx); err != nil {
				44	slog.DebugContext(ctx, "CodeReviewer.Run: failed to check for normal git state", "err", err)
				45	return "", err
				46	}
				47	if err := r.RequireNoUncommittedChanges(ctx); err != nil {
				48	slog.DebugContext(ctx, "CodeReviewer.Run: failed to check for uncommitted changes", "err", err)
				49	return "", err
				50	}
				51
				52	// Check that the current commit is not the initial commit
				53	currentCommit, err := r.CurrentCommit(ctx)
				54	if err != nil {
				55	slog.DebugContext(ctx, "CodeReviewer.Run: failed to get current commit", "err", err)
				56	return "", err
				57	}
				58	if r.IsInitialCommit(currentCommit) {
				59	slog.DebugContext(ctx, "CodeReviewer.Run: current commit is initial commit, nothing to review")
				60	return "", fmt.Errorf("no new commits have been added, nothing to review")
				61	}
				62
				63	// No matter what failures happen from here out, we will declare this to have been reviewed.
				64	// This should help avoid the model getting blocked by a broken code review tool.
				65	r.reviewed = append(r.reviewed, currentCommit)
				66
				67	changedFiles, err := r.changedFiles(ctx, r.initialCommit, currentCommit)
				68	if err != nil {
				69	slog.DebugContext(ctx, "CodeReviewer.Run: failed to get changed files", "err", err)
				70	return "", err
				71	}
				72
				73	// Prepare to analyze before/after for the impacted files.
				74	// We use the current commit to determine what packages exist and are impacted.
				75	// The packages in the initial commit may be different.
				76	// Good enough for now.
				77	// TODO: do better
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	78	allPkgs, err := r.packagesForFiles(ctx, changedFiles)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	79	if err != nil {
				80	// TODO: log and skip to stuff that doesn't require packages
				81	slog.DebugContext(ctx, "CodeReviewer.Run: failed to get packages for files", "err", err)
				82	return "", err
				83	}
				84	allPkgList := slices.Collect(maps.Keys(allPkgs))
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	85
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	86	var errorMessages []string // problems we want the model to address
				87	var infoMessages []string // info the model should consider
				88
				89	// Find potentially related files that should also be considered
				90	// TODO: add some caching here, since this depends only on the initial commit and the changed files, not the details of the changes
				91	relatedFiles, err := r.findRelatedFiles(ctx, changedFiles)
				92	if err != nil {
				93	slog.DebugContext(ctx, "CodeReviewer.Run: failed to find related files", "err", err)
				94	} else {
				95	relatedMsg := r.formatRelatedFiles(relatedFiles)
				96	if relatedMsg != "" {
				97	infoMessages = append(infoMessages, relatedMsg)
				98	}
				99	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	100
				101	testMsg, err := r.checkTests(ctx, allPkgList)
				102	if err != nil {
				103	slog.DebugContext(ctx, "CodeReviewer.Run: failed to check tests", "err", err)
				104	return "", err
				105	}
				106	if testMsg != "" {
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	107	errorMessages = append(errorMessages, testMsg)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	108	}
				109
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	110	goplsMsg, err := r.checkGopls(ctx, changedFiles) // includes vet checks
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	111	if err != nil {
				112	slog.DebugContext(ctx, "CodeReviewer.Run: failed to check gopls", "err", err)
				113	return "", err
				114	}
				115	if goplsMsg != "" {
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	116	errorMessages = append(errorMessages, goplsMsg)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	117	}
				118
Josh Bleecher Snyder	e2518e5	2025-04-29 11:13:40 -0700	[diff] [blame]	119	if r.llmReview {
				120	llmComments, err := r.doLLMReview(ctx)
				121	if err != nil {
				122	// Log the error but don't fail the codereview if this check fails
				123	slog.WarnContext(ctx, "CodeReviewer.Run: error doing LLM review", "err", err)
				124	}
				125	if llmComments != "" {
				126	infoMessages = append(infoMessages, llmComments)
				127	}
				128	}
				129
Josh Bleecher Snyder	2dc86b9	2025-04-29 14:11:58 +0000	[diff] [blame]	130	// NOTE: If you change this output format, update the corresponding UI parsing in:
				131	// webui/src/web-components/sketch-tool-card.ts (SketchToolCardCodeReview.getStatusIcon)
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	132	buf := new(strings.Builder)
				133	if len(infoMessages) > 0 {
				134	buf.WriteString("# Info\n\n")
				135	buf.WriteString(strings.Join(infoMessages, "\n\n"))
				136	buf.WriteString("\n\n")
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	137	}
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	138	if len(errorMessages) > 0 {
				139	buf.WriteString("# Errors\n\n")
				140	buf.WriteString(strings.Join(errorMessages, "\n\n"))
				141	buf.WriteString("\n\nPlease fix before proceeding.\n")
				142	}
				143	if buf.Len() == 0 {
				144	buf.WriteString("OK")
				145	}
				146	return buf.String(), nil
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	147	}
				148
				149	func (r *CodeReviewer) initializeInitialCommitWorktree(ctx context.Context) error {
				150	if r.initialWorktree != "" {
				151	return nil
				152	}
				153	tmpDir, err := os.MkdirTemp("", "sketch-codereview-worktree")
				154	if err != nil {
				155	return err
				156	}
				157	worktreeCmd := exec.CommandContext(ctx, "git", "worktree", "add", "--detach", tmpDir, r.initialCommit)
				158	worktreeCmd.Dir = r.repoRoot
				159	out, err := worktreeCmd.CombinedOutput()
				160	if err != nil {
				161	return fmt.Errorf("unable to create worktree for initial commit: %w\n%s", err, out)
				162	}
				163	r.initialWorktree = tmpDir
				164	return nil
				165	}
				166
				167	func (r *CodeReviewer) checkTests(ctx context.Context, pkgList []string) (string, error) {
				168	goTestArgs := []string{"test", "-json", "-v"}
				169	goTestArgs = append(goTestArgs, pkgList...)
				170
				171	afterTestCmd := exec.CommandContext(ctx, "go", goTestArgs...)
				172	afterTestCmd.Dir = r.repoRoot
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	173	afterTestOut, _ := afterTestCmd.Output()
				174	// unfortunately, we can't short-circuit here even if all tests pass,
				175	// because we need to check for skipped tests.
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	176
				177	err := r.initializeInitialCommitWorktree(ctx)
				178	if err != nil {
				179	return "", err
				180	}
				181
				182	beforeTestCmd := exec.CommandContext(ctx, "go", goTestArgs...)
				183	beforeTestCmd.Dir = r.initialWorktree
				184	beforeTestOut, _ := beforeTestCmd.Output() // ignore error, interesting info is in the output
				185
				186	// Parse the jsonl test results
				187	beforeResults, beforeParseErr := parseTestResults(beforeTestOut)
				188	if beforeParseErr != nil {
				189	return "", fmt.Errorf("unable to parse test results for initial commit: %w\n%s", beforeParseErr, beforeTestOut)
				190	}
				191	afterResults, afterParseErr := parseTestResults(afterTestOut)
				192	if afterParseErr != nil {
				193	return "", fmt.Errorf("unable to parse test results for current commit: %w\n%s", afterParseErr, afterTestOut)
				194	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	195	testRegressions, err := r.compareTestResults(beforeResults, afterResults)
				196	if err != nil {
				197	return "", fmt.Errorf("failed to compare test results: %w", err)
				198	}
				199	// TODO: better output formatting?
				200	res := r.formatTestRegressions(testRegressions)
				201	return res, nil
				202	}
				203
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	204	// GoplsIssue represents a single issue reported by gopls check
				205	type GoplsIssue struct {
				206	Position string // File position in format "file:line:col-range"
				207	Message string // Description of the issue
				208	}
				209
				210	// checkGopls runs gopls check on the provided files in both the current and initial state,
				211	// compares the results, and reports any new issues introduced in the current state.
				212	func (r *CodeReviewer) checkGopls(ctx context.Context, changedFiles []string) (string, error) {
				213	if len(changedFiles) == 0 {
				214	return "", nil // no files to check
				215	}
				216
				217	// Filter out non-Go files as gopls only works on Go files
				218	// and verify they still exist (not deleted)
				219	var goFiles []string
				220	for _, file := range changedFiles {
				221	if !strings.HasSuffix(file, ".go") {
				222	continue // not a Go file
				223	}
				224
				225	// Check if the file still exists (not deleted)
				226	if _, err := os.Stat(file); os.IsNotExist(err) {
				227	continue // file doesn't exist anymore (deleted)
				228	}
				229
				230	goFiles = append(goFiles, file)
				231	}
				232
				233	if len(goFiles) == 0 {
				234	return "", nil // no Go files to check
				235	}
				236
				237	// Run gopls check on the current state
				238	goplsArgs := append([]string{"check"}, goFiles...)
				239
				240	afterGoplsCmd := exec.CommandContext(ctx, "gopls", goplsArgs...)
				241	afterGoplsCmd.Dir = r.repoRoot
				242	afterGoplsOut, err := afterGoplsCmd.CombinedOutput() // gopls returns non-zero if it finds issues
				243	if err != nil {
				244	// Check if the output looks like real gopls issues or if it's just error output
				245	if !looksLikeGoplsIssues(afterGoplsOut) {
				246	slog.WarnContext(ctx, "CodeReviewer.checkGopls: gopls check failed to run properly", "err", err, "output", string(afterGoplsOut))
				247	return "", nil // Skip rather than failing the entire code review
				248	}
				249	// Otherwise, proceed with parsing - it's likely just the non-zero exit code due to found issues
				250	}
				251
				252	// Parse the output
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	253	afterIssues := parseGoplsOutput(r.repoRoot, afterGoplsOut)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	254
				255	// If no issues were found, we're done
				256	if len(afterIssues) == 0 {
				257	return "", nil
				258	}
				259
				260	// Gopls detected issues in the current state, check if they existed in the initial state
				261	initErr := r.initializeInitialCommitWorktree(ctx)
				262	if initErr != nil {
				263	return "", err
				264	}
				265
				266	// For each file that exists in the initial commit, run gopls check
				267	var initialFilesToCheck []string
				268	for _, file := range goFiles {
				269	// Get relative path for git operations
				270	relFile, err := filepath.Rel(r.repoRoot, file)
				271	if err != nil {
				272	slog.WarnContext(ctx, "CodeReviewer.checkGopls: failed to get relative path", "repo_root", r.repoRoot, "file", file, "err", err)
				273	continue
				274	}
				275
				276	// Check if the file exists in the initial commit
				277	checkCmd := exec.CommandContext(ctx, "git", "cat-file", "-e", fmt.Sprintf("%s:%s", r.initialCommit, relFile))
				278	checkCmd.Dir = r.repoRoot
				279	if err := checkCmd.Run(); err == nil {
				280	// File exists in initial commit
				281	initialFilePath := filepath.Join(r.initialWorktree, relFile)
				282	initialFilesToCheck = append(initialFilesToCheck, initialFilePath)
				283	}
				284	}
				285
				286	// Run gopls check on the files that existed in the initial commit
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	287	var beforeIssues []GoplsIssue
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	288	if len(initialFilesToCheck) > 0 {
				289	beforeGoplsArgs := append([]string{"check"}, initialFilesToCheck...)
				290	beforeGoplsCmd := exec.CommandContext(ctx, "gopls", beforeGoplsArgs...)
				291	beforeGoplsCmd.Dir = r.initialWorktree
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	292	beforeGoplsOut, beforeCmdErr := beforeGoplsCmd.CombinedOutput()
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	293	if beforeCmdErr != nil && !looksLikeGoplsIssues(beforeGoplsOut) {
				294	// If gopls fails to run properly on the initial commit, log a warning and continue
				295	// with empty before issues - this will be conservative and report more issues
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	296	slog.WarnContext(ctx, "CodeReviewer.checkGopls: gopls check failed on initial commit", "err", err, "output", string(beforeGoplsOut))
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	297	} else {
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	298	beforeIssues = parseGoplsOutput(r.initialWorktree, beforeGoplsOut)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	299	}
				300	}
				301
				302	// Find new issues that weren't present in the initial state
				303	goplsRegressions := findGoplsRegressions(beforeIssues, afterIssues)
				304	if len(goplsRegressions) == 0 {
				305	return "", nil // no new issues
				306	}
				307
				308	// Format the results
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	309	return r.formatGoplsRegressions(goplsRegressions), nil
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	310	}
				311
				312	// parseGoplsOutput parses the text output from gopls check
				313	// Each line has the format: '/path/to/file.go:448:22-26: unused parameter: path'
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	314	func parseGoplsOutput(root string, output []byte) []GoplsIssue {
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	315	var issues []GoplsIssue
Josh Bleecher Snyder	f4047bb	2025-05-05 23:02:56 +0000	[diff] [blame^]	316	for line := range strings.Lines(string(output)) {
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	317	line = strings.TrimSpace(line)
				318	if line == "" {
				319	continue
				320	}
				321
				322	// Skip lines that look like error messages rather than gopls issues
				323	if strings.HasPrefix(line, "Error:") \|\|
				324	strings.HasPrefix(line, "Failed:") \|\|
				325	strings.HasPrefix(line, "Warning:") \|\|
				326	strings.HasPrefix(line, "gopls:") {
				327	continue
				328	}
				329
				330	// Find the first colon that separates the file path from the line number
				331	firstColonIdx := strings.Index(line, ":")
				332	if firstColonIdx < 0 {
				333	continue // Invalid format
				334	}
				335
				336	// Verify the part before the first colon looks like a file path
				337	potentialPath := line[:firstColonIdx]
				338	if !strings.HasSuffix(potentialPath, ".go") {
				339	continue // Not a Go file path
				340	}
				341
				342	// Find the position of the first message separator ': '
				343	// This separates the position info from the message
				344	messageStart := strings.Index(line, ": ")
				345	if messageStart < 0 \|\| messageStart <= firstColonIdx {
				346	continue // Invalid format
				347	}
				348
				349	// Extract position and message
				350	position := line[:messageStart]
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	351	rel, err := filepath.Rel(root, position)
				352	if err == nil {
				353	position = rel
				354	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	355	message := line[messageStart+2:] // Skip the ': ' separator
				356
				357	// Verify position has the expected format (at least 2 colons for line:col)
				358	colonCount := strings.Count(position, ":")
				359	if colonCount < 2 {
				360	continue // Not enough position information
				361	}
				362
				363	issues = append(issues, GoplsIssue{
				364	Position: position,
				365	Message: message,
				366	})
				367	}
				368
				369	return issues
				370	}
				371
				372	// looksLikeGoplsIssues checks if the output appears to be actual gopls issues
				373	// rather than error messages about gopls itself failing
				374	func looksLikeGoplsIssues(output []byte) bool {
				375	// If output is empty, it's not valid issues
				376	if len(output) == 0 {
				377	return false
				378	}
				379
				380	// Check if output has at least one line that looks like a gopls issue
				381	// A gopls issue looks like: '/path/to/file.go:123:45-67: message'
Josh Bleecher Snyder	f4047bb	2025-05-05 23:02:56 +0000	[diff] [blame^]	382	for line := range strings.Lines(string(output)) {
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	383	line = strings.TrimSpace(line)
				384	if line == "" {
				385	continue
				386	}
				387
				388	// A gopls issue has at least two colons (file path, line number, column)
				389	// and contains a colon followed by a space (separating position from message)
				390	colonCount := strings.Count(line, ":")
				391	hasSeparator := strings.Contains(line, ": ")
				392
				393	if colonCount >= 2 && hasSeparator {
				394	// Check if it starts with a likely file path (ending in .go)
				395	parts := strings.SplitN(line, ":", 2)
				396	if strings.HasSuffix(parts[0], ".go") {
				397	return true
				398	}
				399	}
				400	}
				401	return false
				402	}
				403
				404	// normalizeGoplsPosition extracts just the file path from a position string
				405	func normalizeGoplsPosition(position string) string {
				406	// Extract just the file path by taking everything before the first colon
				407	parts := strings.Split(position, ":")
				408	if len(parts) < 1 {
				409	return position
				410	}
				411	return parts[0]
				412	}
				413
				414	// findGoplsRegressions identifies gopls issues that are new in the after state
				415	func findGoplsRegressions(before, after []GoplsIssue) []GoplsIssue {
				416	var regressions []GoplsIssue
				417
				418	// Build map of before issues for easier lookup
				419	beforeIssueMap := make(map[string]map[string]bool) // file -> message -> exists
				420	for _, issue := range before {
				421	file := normalizeGoplsPosition(issue.Position)
				422	if _, exists := beforeIssueMap[file]; !exists {
				423	beforeIssueMap[file] = make(map[string]bool)
				424	}
				425	// Store both the exact message and the general issue type for fuzzy matching
				426	beforeIssueMap[file][issue.Message] = true
				427
				428	// Extract the general issue type (everything before the first ':' in the message)
				429	generalIssue := issue.Message
				430	if colonIdx := strings.Index(issue.Message, ":"); colonIdx > 0 {
				431	generalIssue = issue.Message[:colonIdx]
				432	}
				433	beforeIssueMap[file][generalIssue] = true
				434	}
				435
				436	// Check each after issue to see if it's new
				437	for _, afterIssue := range after {
				438	file := normalizeGoplsPosition(afterIssue.Position)
				439	isNew := true
				440
				441	if fileIssues, fileExists := beforeIssueMap[file]; fileExists {
				442	// Check for exact message match
				443	if fileIssues[afterIssue.Message] {
				444	isNew = false
				445	} else {
				446	// Check for general issue type match
				447	generalIssue := afterIssue.Message
				448	if colonIdx := strings.Index(afterIssue.Message, ":"); colonIdx > 0 {
				449	generalIssue = afterIssue.Message[:colonIdx]
				450	}
				451	if fileIssues[generalIssue] {
				452	isNew = false
				453	}
				454	}
				455	}
				456
				457	if isNew {
				458	regressions = append(regressions, afterIssue)
				459	}
				460	}
				461
				462	// Sort regressions for deterministic output
				463	slices.SortFunc(regressions, func(a, b GoplsIssue) int {
				464	return strings.Compare(a.Position, b.Position)
				465	})
				466
				467	return regressions
				468	}
				469
				470	// formatGoplsRegressions generates a human-readable summary of gopls check regressions
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	471	func (r *CodeReviewer) formatGoplsRegressions(regressions []GoplsIssue) string {
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	472	if len(regressions) == 0 {
				473	return ""
				474	}
				475
				476	var sb strings.Builder
				477	sb.WriteString("Gopls check issues detected:\n\n")
				478
				479	// Format each issue
				480	for i, issue := range regressions {
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	481	sb.WriteString(fmt.Sprintf("%d. %s: %s\n", i+1, filepath.Join(r.repoRoot, issue.Position), issue.Message))
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	482	}
				483
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	484	sb.WriteString("\nIMPORTANT: Only fix new gopls check issues in parts of the code that you have already edited.")
				485	sb.WriteString(" Do not change existing code that was not part of your current edits.\n")
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	486	return sb.String()
				487	}
				488
				489	func (r *CodeReviewer) HasReviewed(commit string) bool {
				490	return slices.Contains(r.reviewed, commit)
				491	}
				492
				493	func (r *CodeReviewer) IsInitialCommit(commit string) bool {
				494	return commit == r.initialCommit
				495	}
				496
				497	// packagesForFiles returns maps of packages related to the given files:
				498	// 1. directPkgs: packages that directly contain the changed files
				499	// 2. allPkgs: all packages that might be affected, including downstream packages that depend on the direct packages
				500	// It may include false positives.
				501	// Files must be absolute paths!
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	502	func (r CodeReviewer) packagesForFiles(ctx context.Context, files []string) (allPkgs map[string]packages.Package, err error) {
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	503	for _, f := range files {
				504	if !filepath.IsAbs(f) {
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	505	return nil, fmt.Errorf("path %q is not absolute", f)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	506	}
				507	}
				508	cfg := &packages.Config{
				509	Mode: packages.LoadImports \| packages.NeedEmbedFiles,
				510	Context: ctx,
				511	// Logf: func(msg string, args ...any) {
				512	// slog.DebugContext(ctx, "loading go packages", "msg", fmt.Sprintf(msg, args...))
				513	// },
				514	// TODO: in theory, go.mod might not be in the repo root, and there might be multiple go.mod files.
				515	// We can cross that bridge when we get there.
				516	Dir: r.repoRoot,
				517	Tests: true,
				518	}
				519	universe, err := packages.Load(cfg, "./...")
				520	if err != nil {
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	521	return nil, err
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	522	}
				523	// Identify packages that directly contain the changed files
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	524	directPkgs := make(map[string]*packages.Package) // import path -> package
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	525	for _, pkg := range universe {
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	526	pkgFiles := allFiles(pkg)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	527	for _, file := range files {
				528	if pkgFiles[file] {
				529	// prefer test packages, as they contain strictly more files (right?)
				530	prev := directPkgs[pkg.PkgPath]
				531	if prev == nil \|\| prev.ForTest == "" {
				532	directPkgs[pkg.PkgPath] = pkg
				533	}
				534	}
				535	}
				536	}
				537
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	538	allPkgs = maps.Clone(directPkgs)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	539
				540	// Add packages that depend on the direct packages
				541	addDependentPackages(universe, allPkgs)
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	542	return allPkgs, nil
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	543	}
				544
				545	// allFiles returns all files that might be referenced by the package.
				546	// It may contain false positives.
				547	func allFiles(p *packages.Package) map[string]bool {
				548	files := make(map[string]bool)
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	549	// Add files from package info
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	550	add := [][]string{p.GoFiles, p.CompiledGoFiles, p.OtherFiles, p.EmbedFiles, p.IgnoredFiles}
				551	for _, extra := range add {
				552	for _, file := range extra {
				553	files[file] = true
				554	}
				555	}
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	556	// Add files from testdata directory
				557	testdataDir := filepath.Join(p.Dir, "testdata")
				558	if _, err := os.Stat(testdataDir); err == nil {
				559	fsys := os.DirFS(p.Dir)
				560	fs.WalkDir(fsys, "testdata", func(path string, d fs.DirEntry, err error) error {
				561	if err == nil && !d.IsDir() {
				562	files[filepath.Join(p.Dir, path)] = true
				563	}
				564	return nil
				565	})
				566	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	567	return files
				568	}
				569
				570	// addDependentPackages adds to pkgs all packages from universe
				571	// that directly or indirectly depend on any package already in pkgs.
				572	func addDependentPackages(universe []packages.Package, pkgs map[string]packages.Package) {
				573	for {
				574	changed := false
				575	for _, p := range universe {
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	576	if strings.HasSuffix(p.PkgPath, ".test") { // ick, but I don't see another way
				577	// skip test packages
				578	continue
				579	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	580	if _, ok := pkgs[p.PkgPath]; ok {
				581	// already in pkgs
				582	continue
				583	}
				584	for importPath := range p.Imports {
				585	if _, ok := pkgs[importPath]; ok {
				586	// imports a package dependent on pkgs, add it
				587	pkgs[p.PkgPath] = p
				588	changed = true
				589	break
				590	}
				591	}
				592	}
				593	if !changed {
				594	break
				595	}
				596	}
				597	}
				598
				599	// testJSON is a union of BuildEvent and TestEvent
				600	type testJSON struct {
				601	// TestEvent only:
				602	// The Time field holds the time the event happened. It is conventionally omitted
				603	// for cached test results.
				604	Time time.Time `json:"Time"`
				605	// BuildEvent only:
				606	// The ImportPath field gives the package ID of the package being built.
				607	// This matches the Package.ImportPath field of go list -json and the
				608	// TestEvent.FailedBuild field of go test -json. Note that it does not
				609	// match TestEvent.Package.
				610	ImportPath string `json:"ImportPath"` // BuildEvent only
				611	// TestEvent only:
				612	// The Package field, if present, specifies the package being tested. When the
				613	// go command runs parallel tests in -json mode, events from different tests are
				614	// interlaced; the Package field allows readers to separate them.
				615	Package string `json:"Package"`
				616	// Action is used in both BuildEvent and TestEvent.
				617	// It is the key to distinguishing between them.
				618	// BuildEvent:
				619	// build-output or build-fail
				620	// TestEvent:
				621	// start, run, pause, cont, pass, bench, fail, output, skip
				622	Action string `json:"Action"`
				623	// TestEvent only:
				624	// The Test field, if present, specifies the test, example, or benchmark function
				625	// that caused the event. Events for the overall package test do not set Test.
				626	Test string `json:"Test"`
				627	// TestEvent only:
				628	// The Elapsed field is set for "pass" and "fail" events. It gives the time elapsed in seconds
				629	// for the specific test or the overall package test that passed or failed.
				630	Elapsed float64
				631	// TestEvent:
				632	// The Output field is set for Action == "output" and is a portion of the
				633	// test's output (standard output and standard error merged together). The
				634	// output is unmodified except that invalid UTF-8 output from a test is coerced
				635	// into valid UTF-8 by use of replacement characters. With that one exception,
				636	// the concatenation of the Output fields of all output events is the exact output
				637	// of the test execution.
				638	// BuildEvent:
				639	// The Output field is set for Action == "build-output" and is a portion of
				640	// the build's output. The concatenation of the Output fields of all output
				641	// events is the exact output of the build. A single event may contain one
				642	// or more lines of output and there may be more than one output event for
				643	// a given ImportPath. This matches the definition of the TestEvent.Output
				644	// field produced by go test -json.
				645	Output string `json:"Output"`
				646	// TestEvent only:
				647	// The FailedBuild field is set for Action == "fail" if the test failure was caused
				648	// by a build failure. It contains the package ID of the package that failed to
				649	// build. This matches the ImportPath field of the "go list" output, as well as the
				650	// BuildEvent.ImportPath field as emitted by "go build -json".
				651	FailedBuild string `json:"FailedBuild"`
				652	}
				653
				654	// parseTestResults converts test output in JSONL format into a slice of testJSON objects
				655	func parseTestResults(testOutput []byte) ([]testJSON, error) {
				656	var results []testJSON
				657	dec := json.NewDecoder(bytes.NewReader(testOutput))
				658	for {
				659	var event testJSON
				660	if err := dec.Decode(&event); err != nil {
				661	if err == io.EOF {
				662	break
				663	}
				664	return nil, err
				665	}
				666	results = append(results, event)
				667	}
				668	return results, nil
				669	}
				670
				671	// testStatus represents the status of a test in a given commit
				672	type testStatus int
				673
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	674	//go:generate go tool golang.org/x/tools/cmd/stringer -type=testStatus -trimprefix=testStatus
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	675	const (
				676	testStatusUnknown testStatus = iota
				677	testStatusPass
				678	testStatusFail
				679	testStatusBuildFail
				680	testStatusSkip
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	681	testStatusNoTests // no tests exist for this package
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	682	)
				683
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	684	// testRegression represents a test that regressed between commits
				685	type testRegression struct {
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	686	Package string
				687	Test string // empty for package tests
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	688	BeforeStatus testStatus
				689	AfterStatus testStatus
				690	Output string // failure output in the after state
				691	}
				692
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	693	func (r *testRegression) Source() string {
				694	if r.Test == "" {
				695	return r.Package
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	696	}
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	697	return fmt.Sprintf("%s.%s", r.Package, r.Test)
				698	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	699
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	700	type packageResult struct {
				701	Status testStatus // overall status for the package
				702	TestStatus map[string]testStatus // name -> status
				703	TestOutput map[string][]string // name -> output parts
				704	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	705
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	706	// collectTestStatuses processes a slice of test events and returns rich status information
				707	func collectTestStatuses(results []testJSON) map[string]*packageResult {
				708	m := make(map[string]*packageResult)
				709
				710	for _, event := range results {
				711	pkg := event.Package
				712	p, ok := m[pkg]
				713	if !ok {
				714	p = new(packageResult)
				715	p.TestStatus = make(map[string]testStatus)
				716	p.TestOutput = make(map[string][]string)
				717	m[pkg] = p
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	718	}
				719
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	720	switch event.Action {
				721	case "output":
				722	p.TestOutput[event.Test] = append(p.TestOutput[event.Test], event.Output)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	723	continue
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	724	case "pass":
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	725	if event.Test == "" {
				726	p.Status = testStatusPass
				727	} else {
				728	p.TestStatus[event.Test] = testStatusPass
				729	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	730	case "fail":
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	731	if event.Test == "" {
				732	if event.FailedBuild != "" {
				733	p.Status = testStatusBuildFail
				734	} else {
				735	p.Status = testStatusFail
				736	}
				737	} else {
				738	p.TestStatus[event.Test] = testStatusFail
				739	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	740	case "skip":
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	741	if event.Test == "" {
				742	p.Status = testStatusNoTests
				743	} else {
				744	p.TestStatus[event.Test] = testStatusSkip
				745	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	746	}
				747	}
				748
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	749	return m
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	750	}
				751
				752	// compareTestResults identifies tests that have regressed between commits
				753	func (r *CodeReviewer) compareTestResults(beforeResults, afterResults []testJSON) ([]testRegression, error) {
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	754	before := collectTestStatuses(beforeResults)
				755	after := collectTestStatuses(afterResults)
				756	var testLevelRegressions []testRegression
				757	var packageLevelRegressions []testRegression
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	758
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	759	afterPkgs := slices.Sorted(maps.Keys(after))
				760	for _, pkg := range afterPkgs {
				761	afterResult := after[pkg]
				762	afterStatus := afterResult.Status
				763	// Can't short-circuit here when tests are passing, because we need to check for skipped tests.
				764	beforeResult, ok := before[pkg]
				765	beforeStatus := testStatusNoTests
				766	if ok {
				767	beforeStatus = beforeResult.Status
				768	}
				769	// If things no longer build, stop at the package level.
				770	// Otherwise, proceed to the test level, so we have more precise information.
				771	if afterStatus == testStatusBuildFail && beforeStatus != testStatusBuildFail {
				772	packageLevelRegressions = append(packageLevelRegressions, testRegression{
				773	Package: pkg,
				774	BeforeStatus: beforeStatus,
				775	AfterStatus: afterStatus,
				776	})
				777	continue
				778	}
				779	tests := slices.Sorted(maps.Keys(afterResult.TestStatus))
				780	for _, test := range tests {
				781	afterStatus := afterResult.TestStatus[test]
				782	switch afterStatus {
				783	case testStatusPass:
				784	continue
				785	case testStatusUnknown:
				786	slog.WarnContext(context.Background(), "unknown test status", "package", pkg, "test", test)
				787	continue
				788	}
				789	beforeStatus := beforeResult.TestStatus[test]
				790	if isRegression(beforeStatus, afterStatus) {
				791	testLevelRegressions = append(testLevelRegressions, testRegression{
				792	Package: pkg,
				793	Test: test,
				794	BeforeStatus: beforeStatus,
				795	AfterStatus: afterStatus,
				796	Output: strings.Join(afterResult.TestOutput[test], ""),
				797	})
				798	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	799	}
				800	}
				801
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	802	// If we have test-level regressions, report only those
				803	// Otherwise, report package-level regressions
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	804	var regressions []testRegression
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	805	if len(testLevelRegressions) > 0 {
				806	regressions = testLevelRegressions
				807	} else {
				808	regressions = packageLevelRegressions
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	809	}
				810
				811	// Sort regressions for consistent output
				812	slices.SortFunc(regressions, func(a, b testRegression) int {
				813	// First by package
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	814	if c := strings.Compare(a.Package, b.Package); c != 0 {
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	815	return c
				816	}
				817	// Then by test name
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	818	return strings.Compare(a.Test, b.Test)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	819	})
				820
				821	return regressions, nil
				822	}
				823
				824	// badnessLevels maps test status to a badness level
				825	// Higher values indicate worse status (more severe issues)
				826	var badnessLevels = map[testStatus]int{
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	827	testStatusBuildFail: 5, // Worst
				828	testStatusFail: 4,
				829	testStatusSkip: 3,
				830	testStatusNoTests: 2,
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	831	testStatusPass: 1,
				832	testStatusUnknown: 0, // Least bad - avoids false positives
				833	}
				834
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	835	// regressionFormatter defines a mapping of before/after state pairs to descriptive messages
				836	type regressionKey struct {
				837	before, after testStatus
				838	}
				839
				840	var regressionMessages = map[regressionKey]string{
				841	{testStatusUnknown, testStatusBuildFail}: "New test has build/vet errors",
				842	{testStatusUnknown, testStatusFail}: "New test is failing",
				843	{testStatusUnknown, testStatusSkip}: "New test is skipped",
				844	{testStatusPass, testStatusBuildFail}: "Was passing, now has build/vet errors",
				845	{testStatusPass, testStatusFail}: "Was passing, now failing",
				846	{testStatusPass, testStatusSkip}: "Was passing, now skipped",
				847	{testStatusNoTests, testStatusBuildFail}: "Previously had no tests, now has build/vet errors",
				848	{testStatusNoTests, testStatusFail}: "Previously had no tests, now has failing tests",
				849	{testStatusNoTests, testStatusSkip}: "Previously had no tests, now has skipped tests",
				850	{testStatusSkip, testStatusBuildFail}: "Was skipped, now has build/vet errors",
				851	{testStatusSkip, testStatusFail}: "Was skipped, now failing",
				852	{testStatusFail, testStatusBuildFail}: "Was failing, now has build/vet errors",
				853	}
				854
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	855	// isRegression determines if a test has regressed based on before and after status
				856	// A regression is defined as an increase in badness level
				857	func isRegression(before, after testStatus) bool {
				858	// Higher badness level means worse status
				859	return badnessLevels[after] > badnessLevels[before]
				860	}
				861
				862	// formatTestRegressions generates a human-readable summary of test regressions
				863	func (r *CodeReviewer) formatTestRegressions(regressions []testRegression) string {
				864	if len(regressions) == 0 {
				865	return ""
				866	}
				867
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	868	buf := new(strings.Builder)
				869	fmt.Fprintf(buf, "Test regressions detected between initial commit (%s) and HEAD:\n\n", r.initialCommit)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	870
				871	for i, reg := range regressions {
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	872	fmt.Fprintf(buf, "%d: %v: ", i+1, reg.Source())
				873	key := regressionKey{reg.BeforeStatus, reg.AfterStatus}
				874	message, exists := regressionMessages[key]
				875	if !exists {
				876	message = "Regression detected"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	877	}
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	878	fmt.Fprintf(buf, "%s\n", message)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	879	}
				880
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	881	return buf.String()
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	882	}
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	883
				884	// RelatedFile represents a file historically related to the changed files
				885	type RelatedFile struct {
				886	Path string // Path to the file
				887	Correlation float64 // Correlation score (0.0-1.0)
				888	}
				889
				890	// findRelatedFiles identifies files that are historically related to the changed files
				891	// by analyzing git commit history for co-occurrences.
				892	func (r *CodeReviewer) findRelatedFiles(ctx context.Context, changedFiles []string) ([]RelatedFile, error) {
				893	commits, err := r.getCommitsTouchingFiles(ctx, changedFiles)
				894	if err != nil {
				895	return nil, fmt.Errorf("failed to get commits touching files: %w", err)
				896	}
				897	if len(commits) == 0 {
				898	return nil, nil
				899	}
				900
				901	relChanged := make(map[string]bool, len(changedFiles))
				902	for _, file := range changedFiles {
				903	rel, err := filepath.Rel(r.repoRoot, file)
				904	if err != nil {
				905	return nil, fmt.Errorf("failed to get relative path for %s: %w", file, err)
				906	}
				907	relChanged[rel] = true
				908	}
				909
				910	historyFiles := make(map[string]int)
				911	var historyMu sync.Mutex
				912
				913	maxWorkers := runtime.GOMAXPROCS(0)
				914	semaphore := make(chan bool, maxWorkers)
				915	var wg sync.WaitGroup
				916
				917	for _, commit := range commits {
				918	wg.Add(1)
				919	semaphore <- true // acquire
				920
				921	go func(commit string) {
				922	defer wg.Done()
				923	defer func() { <-semaphore }() // release
				924	commitFiles, err := r.getFilesInCommit(ctx, commit)
				925	if err != nil {
				926	slog.WarnContext(ctx, "Failed to get files in commit", "commit", commit, "err", err)
				927	return
				928	}
				929	incr := 0
				930	for _, file := range commitFiles {
				931	if relChanged[file] {
				932	incr++
				933	}
				934	}
				935	if incr == 0 {
				936	return
				937	}
				938	historyMu.Lock()
				939	defer historyMu.Unlock()
				940	for _, file := range commitFiles {
				941	historyFiles[file] += incr
				942	}
				943	}(commit)
				944	}
				945	wg.Wait()
				946
				947	// normalize
				948	maxCount := 0
				949	for _, count := range historyFiles {
				950	maxCount = max(maxCount, count)
				951	}
				952	if maxCount == 0 {
				953	return nil, nil
				954	}
				955
				956	var relatedFiles []RelatedFile
				957	for file, count := range historyFiles {
				958	if relChanged[file] {
				959	// Don't include inputs in the output.
				960	continue
				961	}
				962	correlation := float64(count) / float64(maxCount)
				963	// Require min correlation to avoid noise
				964	if correlation >= 0.1 {
				965	relatedFiles = append(relatedFiles, RelatedFile{Path: file, Correlation: correlation})
				966	}
				967	}
				968
				969	// Highest correlation first
				970	slices.SortFunc(relatedFiles, func(a, b RelatedFile) int {
				971	return -1 * cmp.Compare(a.Correlation, b.Correlation)
				972	})
				973
				974	// Limit to 1 correlated file per input file.
				975	// (Arbitrary limit, to be adjusted.)
				976	maxFiles := len(changedFiles)
				977	if len(relatedFiles) > maxFiles {
				978	relatedFiles = relatedFiles[:maxFiles]
				979	}
				980
				981	// TODO: add an LLM in the mix here (like the keyword search tool) to do a filtering pass,
				982	// and then increase the strength of the wording in the relatedFiles message.
				983
				984	return relatedFiles, nil
				985	}
				986
				987	// getCommitsTouchingFiles returns all commits that touch any of the specified files
				988	func (r *CodeReviewer) getCommitsTouchingFiles(ctx context.Context, files []string) ([]string, error) {
				989	if len(files) == 0 {
				990	return nil, nil
				991	}
Josh Bleecher Snyder	d4b1cc7	2025-04-29 13:49:18 +0000	[diff] [blame]	992	fileArgs := append([]string{"rev-list", "--all", "--date-order", "--max-count=100", "--"}, files...)
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	993	cmd := exec.CommandContext(ctx, "git", fileArgs...)
				994	cmd.Dir = r.repoRoot
				995	out, err := cmd.CombinedOutput()
				996	if err != nil {
				997	return nil, fmt.Errorf("failed to get commits: %w\n%s", err, out)
				998	}
				999	return nonEmptyTrimmedLines(out), nil
				1000	}
				1001
				1002	// getFilesInCommit returns all files changed in a specific commit
				1003	func (r *CodeReviewer) getFilesInCommit(ctx context.Context, commit string) ([]string, error) {
				1004	cmd := exec.CommandContext(ctx, "git", "diff-tree", "--no-commit-id", "--name-only", "-r", commit)
				1005	cmd.Dir = r.repoRoot
				1006	out, err := cmd.CombinedOutput()
				1007	if err != nil {
				1008	return nil, fmt.Errorf("failed to get files in commit: %w\n%s", err, out)
				1009	}
				1010	return nonEmptyTrimmedLines(out), nil
				1011	}
				1012
				1013	func nonEmptyTrimmedLines(b []byte) []string {
				1014	var lines []string
				1015	for line := range strings.Lines(string(b)) {
				1016	line = strings.TrimSpace(line)
				1017	if line != "" {
				1018	lines = append(lines, line)
				1019	}
				1020	}
				1021	return lines
				1022	}
				1023
				1024	// formatRelatedFiles formats the related files list into a human-readable message
				1025	func (r *CodeReviewer) formatRelatedFiles(files []RelatedFile) string {
				1026	if len(files) == 0 {
				1027	return ""
				1028	}
				1029
				1030	buf := new(strings.Builder)
				1031
				1032	fmt.Fprintf(buf, "Potentially related files:\n\n")
				1033
				1034	for _, file := range files {
				1035	fmt.Fprintf(buf, "- %s (%0.0f%%)\n", file.Path, 100*file.Correlation)
				1036	}
				1037
				1038	fmt.Fprintf(buf, "\nThese files have historically changed with the files you have modified. Consider whether they require updates as well.\n")
				1039	return buf.String()
				1040	}