Blame - claudetool/differential.go - sketch

blob: a6b3413261b2ce82733ab1cff168a8b986313c2b [file] [log] [blame]

Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	1	package claudetool
				2
				3	import (
				4	"bytes"
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	5	"cmp"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	6	"context"
				7	"encoding/json"
				8	"fmt"
				9	"io"
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	10	"io/fs"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	11	"log/slog"
				12	"maps"
				13	"os"
				14	"os/exec"
				15	"path/filepath"
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	16	"runtime"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	17	"slices"
				18	"strings"
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	19	"sync"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	20	"time"
				21
				22	"golang.org/x/tools/go/packages"
				23	"sketch.dev/ant"
				24	)
				25
				26	// This file does differential quality analysis of a commit relative to a base commit.
				27
				28	// Tool returns a tool spec for a CodeReview tool backed by r.
				29	func (r CodeReviewer) Tool() ant.Tool {
				30	spec := &ant.Tool{
				31	Name: "codereview",
				32	Description: `Run an automated code review.`,
				33	// If you modify this, update the termui template for prettier rendering.
Josh Bleecher Snyder	2b2090e	2025-04-29 16:41:34 -0700	[diff] [blame]	34	InputSchema: ant.MustSchema(`{"type": "object", "properties": {}}`),
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	35	Run: r.Run,
				36	}
				37	return spec
				38	}
				39
				40	func (r *CodeReviewer) Run(ctx context.Context, m json.RawMessage) (string, error) {
Josh Bleecher Snyder	2dc86b9	2025-04-29 14:11:58 +0000	[diff] [blame]	41	// NOTE: If you add or modify error messages here, update the corresponding UI parsing in:
				42	// webui/src/web-components/sketch-tool-card.ts (SketchToolCardCodeReview.getStatusIcon)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	43	if err := r.RequireNormalGitState(ctx); err != nil {
				44	slog.DebugContext(ctx, "CodeReviewer.Run: failed to check for normal git state", "err", err)
				45	return "", err
				46	}
				47	if err := r.RequireNoUncommittedChanges(ctx); err != nil {
				48	slog.DebugContext(ctx, "CodeReviewer.Run: failed to check for uncommitted changes", "err", err)
				49	return "", err
				50	}
				51
				52	// Check that the current commit is not the initial commit
				53	currentCommit, err := r.CurrentCommit(ctx)
				54	if err != nil {
				55	slog.DebugContext(ctx, "CodeReviewer.Run: failed to get current commit", "err", err)
				56	return "", err
				57	}
				58	if r.IsInitialCommit(currentCommit) {
				59	slog.DebugContext(ctx, "CodeReviewer.Run: current commit is initial commit, nothing to review")
				60	return "", fmt.Errorf("no new commits have been added, nothing to review")
				61	}
				62
				63	// No matter what failures happen from here out, we will declare this to have been reviewed.
				64	// This should help avoid the model getting blocked by a broken code review tool.
				65	r.reviewed = append(r.reviewed, currentCommit)
				66
				67	changedFiles, err := r.changedFiles(ctx, r.initialCommit, currentCommit)
				68	if err != nil {
				69	slog.DebugContext(ctx, "CodeReviewer.Run: failed to get changed files", "err", err)
				70	return "", err
				71	}
				72
				73	// Prepare to analyze before/after for the impacted files.
				74	// We use the current commit to determine what packages exist and are impacted.
				75	// The packages in the initial commit may be different.
				76	// Good enough for now.
				77	// TODO: do better
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	78	allPkgs, err := r.packagesForFiles(ctx, changedFiles)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	79	if err != nil {
				80	// TODO: log and skip to stuff that doesn't require packages
				81	slog.DebugContext(ctx, "CodeReviewer.Run: failed to get packages for files", "err", err)
				82	return "", err
				83	}
				84	allPkgList := slices.Collect(maps.Keys(allPkgs))
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	85
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	86	var errorMessages []string // problems we want the model to address
				87	var infoMessages []string // info the model should consider
				88
				89	// Find potentially related files that should also be considered
				90	// TODO: add some caching here, since this depends only on the initial commit and the changed files, not the details of the changes
				91	relatedFiles, err := r.findRelatedFiles(ctx, changedFiles)
				92	if err != nil {
				93	slog.DebugContext(ctx, "CodeReviewer.Run: failed to find related files", "err", err)
				94	} else {
				95	relatedMsg := r.formatRelatedFiles(relatedFiles)
				96	if relatedMsg != "" {
				97	infoMessages = append(infoMessages, relatedMsg)
				98	}
				99	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	100
				101	testMsg, err := r.checkTests(ctx, allPkgList)
				102	if err != nil {
				103	slog.DebugContext(ctx, "CodeReviewer.Run: failed to check tests", "err", err)
				104	return "", err
				105	}
				106	if testMsg != "" {
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	107	errorMessages = append(errorMessages, testMsg)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	108	}
				109
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	110	goplsMsg, err := r.checkGopls(ctx, changedFiles) // includes vet checks
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	111	if err != nil {
				112	slog.DebugContext(ctx, "CodeReviewer.Run: failed to check gopls", "err", err)
				113	return "", err
				114	}
				115	if goplsMsg != "" {
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	116	errorMessages = append(errorMessages, goplsMsg)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	117	}
				118
Josh Bleecher Snyder	2dc86b9	2025-04-29 14:11:58 +0000	[diff] [blame]	119	// NOTE: If you change this output format, update the corresponding UI parsing in:
				120	// webui/src/web-components/sketch-tool-card.ts (SketchToolCardCodeReview.getStatusIcon)
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	121	buf := new(strings.Builder)
				122	if len(infoMessages) > 0 {
				123	buf.WriteString("# Info\n\n")
				124	buf.WriteString(strings.Join(infoMessages, "\n\n"))
				125	buf.WriteString("\n\n")
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	126	}
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	127	if len(errorMessages) > 0 {
				128	buf.WriteString("# Errors\n\n")
				129	buf.WriteString(strings.Join(errorMessages, "\n\n"))
				130	buf.WriteString("\n\nPlease fix before proceeding.\n")
				131	}
				132	if buf.Len() == 0 {
				133	buf.WriteString("OK")
				134	}
				135	return buf.String(), nil
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	136	}
				137
				138	func (r *CodeReviewer) initializeInitialCommitWorktree(ctx context.Context) error {
				139	if r.initialWorktree != "" {
				140	return nil
				141	}
				142	tmpDir, err := os.MkdirTemp("", "sketch-codereview-worktree")
				143	if err != nil {
				144	return err
				145	}
				146	worktreeCmd := exec.CommandContext(ctx, "git", "worktree", "add", "--detach", tmpDir, r.initialCommit)
				147	worktreeCmd.Dir = r.repoRoot
				148	out, err := worktreeCmd.CombinedOutput()
				149	if err != nil {
				150	return fmt.Errorf("unable to create worktree for initial commit: %w\n%s", err, out)
				151	}
				152	r.initialWorktree = tmpDir
				153	return nil
				154	}
				155
				156	func (r *CodeReviewer) checkTests(ctx context.Context, pkgList []string) (string, error) {
				157	goTestArgs := []string{"test", "-json", "-v"}
				158	goTestArgs = append(goTestArgs, pkgList...)
				159
				160	afterTestCmd := exec.CommandContext(ctx, "go", goTestArgs...)
				161	afterTestCmd.Dir = r.repoRoot
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	162	afterTestOut, _ := afterTestCmd.Output()
				163	// unfortunately, we can't short-circuit here even if all tests pass,
				164	// because we need to check for skipped tests.
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	165
				166	err := r.initializeInitialCommitWorktree(ctx)
				167	if err != nil {
				168	return "", err
				169	}
				170
				171	beforeTestCmd := exec.CommandContext(ctx, "go", goTestArgs...)
				172	beforeTestCmd.Dir = r.initialWorktree
				173	beforeTestOut, _ := beforeTestCmd.Output() // ignore error, interesting info is in the output
				174
				175	// Parse the jsonl test results
				176	beforeResults, beforeParseErr := parseTestResults(beforeTestOut)
				177	if beforeParseErr != nil {
				178	return "", fmt.Errorf("unable to parse test results for initial commit: %w\n%s", beforeParseErr, beforeTestOut)
				179	}
				180	afterResults, afterParseErr := parseTestResults(afterTestOut)
				181	if afterParseErr != nil {
				182	return "", fmt.Errorf("unable to parse test results for current commit: %w\n%s", afterParseErr, afterTestOut)
				183	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	184	testRegressions, err := r.compareTestResults(beforeResults, afterResults)
				185	if err != nil {
				186	return "", fmt.Errorf("failed to compare test results: %w", err)
				187	}
				188	// TODO: better output formatting?
				189	res := r.formatTestRegressions(testRegressions)
				190	return res, nil
				191	}
				192
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	193	// GoplsIssue represents a single issue reported by gopls check
				194	type GoplsIssue struct {
				195	Position string // File position in format "file:line:col-range"
				196	Message string // Description of the issue
				197	}
				198
				199	// checkGopls runs gopls check on the provided files in both the current and initial state,
				200	// compares the results, and reports any new issues introduced in the current state.
				201	func (r *CodeReviewer) checkGopls(ctx context.Context, changedFiles []string) (string, error) {
				202	if len(changedFiles) == 0 {
				203	return "", nil // no files to check
				204	}
				205
				206	// Filter out non-Go files as gopls only works on Go files
				207	// and verify they still exist (not deleted)
				208	var goFiles []string
				209	for _, file := range changedFiles {
				210	if !strings.HasSuffix(file, ".go") {
				211	continue // not a Go file
				212	}
				213
				214	// Check if the file still exists (not deleted)
				215	if _, err := os.Stat(file); os.IsNotExist(err) {
				216	continue // file doesn't exist anymore (deleted)
				217	}
				218
				219	goFiles = append(goFiles, file)
				220	}
				221
				222	if len(goFiles) == 0 {
				223	return "", nil // no Go files to check
				224	}
				225
				226	// Run gopls check on the current state
				227	goplsArgs := append([]string{"check"}, goFiles...)
				228
				229	afterGoplsCmd := exec.CommandContext(ctx, "gopls", goplsArgs...)
				230	afterGoplsCmd.Dir = r.repoRoot
				231	afterGoplsOut, err := afterGoplsCmd.CombinedOutput() // gopls returns non-zero if it finds issues
				232	if err != nil {
				233	// Check if the output looks like real gopls issues or if it's just error output
				234	if !looksLikeGoplsIssues(afterGoplsOut) {
				235	slog.WarnContext(ctx, "CodeReviewer.checkGopls: gopls check failed to run properly", "err", err, "output", string(afterGoplsOut))
				236	return "", nil // Skip rather than failing the entire code review
				237	}
				238	// Otherwise, proceed with parsing - it's likely just the non-zero exit code due to found issues
				239	}
				240
				241	// Parse the output
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	242	afterIssues := parseGoplsOutput(r.repoRoot, afterGoplsOut)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	243
				244	// If no issues were found, we're done
				245	if len(afterIssues) == 0 {
				246	return "", nil
				247	}
				248
				249	// Gopls detected issues in the current state, check if they existed in the initial state
				250	initErr := r.initializeInitialCommitWorktree(ctx)
				251	if initErr != nil {
				252	return "", err
				253	}
				254
				255	// For each file that exists in the initial commit, run gopls check
				256	var initialFilesToCheck []string
				257	for _, file := range goFiles {
				258	// Get relative path for git operations
				259	relFile, err := filepath.Rel(r.repoRoot, file)
				260	if err != nil {
				261	slog.WarnContext(ctx, "CodeReviewer.checkGopls: failed to get relative path", "repo_root", r.repoRoot, "file", file, "err", err)
				262	continue
				263	}
				264
				265	// Check if the file exists in the initial commit
				266	checkCmd := exec.CommandContext(ctx, "git", "cat-file", "-e", fmt.Sprintf("%s:%s", r.initialCommit, relFile))
				267	checkCmd.Dir = r.repoRoot
				268	if err := checkCmd.Run(); err == nil {
				269	// File exists in initial commit
				270	initialFilePath := filepath.Join(r.initialWorktree, relFile)
				271	initialFilesToCheck = append(initialFilesToCheck, initialFilePath)
				272	}
				273	}
				274
				275	// Run gopls check on the files that existed in the initial commit
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	276	var beforeIssues []GoplsIssue
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	277	if len(initialFilesToCheck) > 0 {
				278	beforeGoplsArgs := append([]string{"check"}, initialFilesToCheck...)
				279	beforeGoplsCmd := exec.CommandContext(ctx, "gopls", beforeGoplsArgs...)
				280	beforeGoplsCmd.Dir = r.initialWorktree
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	281	beforeGoplsOut, beforeCmdErr := beforeGoplsCmd.CombinedOutput()
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	282	if beforeCmdErr != nil && !looksLikeGoplsIssues(beforeGoplsOut) {
				283	// If gopls fails to run properly on the initial commit, log a warning and continue
				284	// with empty before issues - this will be conservative and report more issues
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	285	slog.WarnContext(ctx, "CodeReviewer.checkGopls: gopls check failed on initial commit", "err", err, "output", string(beforeGoplsOut))
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	286	} else {
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	287	beforeIssues = parseGoplsOutput(r.initialWorktree, beforeGoplsOut)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	288	}
				289	}
				290
				291	// Find new issues that weren't present in the initial state
				292	goplsRegressions := findGoplsRegressions(beforeIssues, afterIssues)
				293	if len(goplsRegressions) == 0 {
				294	return "", nil // no new issues
				295	}
				296
				297	// Format the results
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	298	return r.formatGoplsRegressions(goplsRegressions), nil
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	299	}
				300
				301	// parseGoplsOutput parses the text output from gopls check
				302	// Each line has the format: '/path/to/file.go:448:22-26: unused parameter: path'
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	303	func parseGoplsOutput(root string, output []byte) []GoplsIssue {
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	304	var issues []GoplsIssue
				305	lines := strings.Split(string(output), "\n")
				306
				307	for _, line := range lines {
				308	line = strings.TrimSpace(line)
				309	if line == "" {
				310	continue
				311	}
				312
				313	// Skip lines that look like error messages rather than gopls issues
				314	if strings.HasPrefix(line, "Error:") \|\|
				315	strings.HasPrefix(line, "Failed:") \|\|
				316	strings.HasPrefix(line, "Warning:") \|\|
				317	strings.HasPrefix(line, "gopls:") {
				318	continue
				319	}
				320
				321	// Find the first colon that separates the file path from the line number
				322	firstColonIdx := strings.Index(line, ":")
				323	if firstColonIdx < 0 {
				324	continue // Invalid format
				325	}
				326
				327	// Verify the part before the first colon looks like a file path
				328	potentialPath := line[:firstColonIdx]
				329	if !strings.HasSuffix(potentialPath, ".go") {
				330	continue // Not a Go file path
				331	}
				332
				333	// Find the position of the first message separator ': '
				334	// This separates the position info from the message
				335	messageStart := strings.Index(line, ": ")
				336	if messageStart < 0 \|\| messageStart <= firstColonIdx {
				337	continue // Invalid format
				338	}
				339
				340	// Extract position and message
				341	position := line[:messageStart]
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	342	rel, err := filepath.Rel(root, position)
				343	if err == nil {
				344	position = rel
				345	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	346	message := line[messageStart+2:] // Skip the ': ' separator
				347
				348	// Verify position has the expected format (at least 2 colons for line:col)
				349	colonCount := strings.Count(position, ":")
				350	if colonCount < 2 {
				351	continue // Not enough position information
				352	}
				353
				354	issues = append(issues, GoplsIssue{
				355	Position: position,
				356	Message: message,
				357	})
				358	}
				359
				360	return issues
				361	}
				362
				363	// looksLikeGoplsIssues checks if the output appears to be actual gopls issues
				364	// rather than error messages about gopls itself failing
				365	func looksLikeGoplsIssues(output []byte) bool {
				366	// If output is empty, it's not valid issues
				367	if len(output) == 0 {
				368	return false
				369	}
				370
				371	// Check if output has at least one line that looks like a gopls issue
				372	// A gopls issue looks like: '/path/to/file.go:123:45-67: message'
				373	lines := strings.Split(string(output), "\n")
				374	for _, line := range lines {
				375	line = strings.TrimSpace(line)
				376	if line == "" {
				377	continue
				378	}
				379
				380	// A gopls issue has at least two colons (file path, line number, column)
				381	// and contains a colon followed by a space (separating position from message)
				382	colonCount := strings.Count(line, ":")
				383	hasSeparator := strings.Contains(line, ": ")
				384
				385	if colonCount >= 2 && hasSeparator {
				386	// Check if it starts with a likely file path (ending in .go)
				387	parts := strings.SplitN(line, ":", 2)
				388	if strings.HasSuffix(parts[0], ".go") {
				389	return true
				390	}
				391	}
				392	}
				393	return false
				394	}
				395
				396	// normalizeGoplsPosition extracts just the file path from a position string
				397	func normalizeGoplsPosition(position string) string {
				398	// Extract just the file path by taking everything before the first colon
				399	parts := strings.Split(position, ":")
				400	if len(parts) < 1 {
				401	return position
				402	}
				403	return parts[0]
				404	}
				405
				406	// findGoplsRegressions identifies gopls issues that are new in the after state
				407	func findGoplsRegressions(before, after []GoplsIssue) []GoplsIssue {
				408	var regressions []GoplsIssue
				409
				410	// Build map of before issues for easier lookup
				411	beforeIssueMap := make(map[string]map[string]bool) // file -> message -> exists
				412	for _, issue := range before {
				413	file := normalizeGoplsPosition(issue.Position)
				414	if _, exists := beforeIssueMap[file]; !exists {
				415	beforeIssueMap[file] = make(map[string]bool)
				416	}
				417	// Store both the exact message and the general issue type for fuzzy matching
				418	beforeIssueMap[file][issue.Message] = true
				419
				420	// Extract the general issue type (everything before the first ':' in the message)
				421	generalIssue := issue.Message
				422	if colonIdx := strings.Index(issue.Message, ":"); colonIdx > 0 {
				423	generalIssue = issue.Message[:colonIdx]
				424	}
				425	beforeIssueMap[file][generalIssue] = true
				426	}
				427
				428	// Check each after issue to see if it's new
				429	for _, afterIssue := range after {
				430	file := normalizeGoplsPosition(afterIssue.Position)
				431	isNew := true
				432
				433	if fileIssues, fileExists := beforeIssueMap[file]; fileExists {
				434	// Check for exact message match
				435	if fileIssues[afterIssue.Message] {
				436	isNew = false
				437	} else {
				438	// Check for general issue type match
				439	generalIssue := afterIssue.Message
				440	if colonIdx := strings.Index(afterIssue.Message, ":"); colonIdx > 0 {
				441	generalIssue = afterIssue.Message[:colonIdx]
				442	}
				443	if fileIssues[generalIssue] {
				444	isNew = false
				445	}
				446	}
				447	}
				448
				449	if isNew {
				450	regressions = append(regressions, afterIssue)
				451	}
				452	}
				453
				454	// Sort regressions for deterministic output
				455	slices.SortFunc(regressions, func(a, b GoplsIssue) int {
				456	return strings.Compare(a.Position, b.Position)
				457	})
				458
				459	return regressions
				460	}
				461
				462	// formatGoplsRegressions generates a human-readable summary of gopls check regressions
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	463	func (r *CodeReviewer) formatGoplsRegressions(regressions []GoplsIssue) string {
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	464	if len(regressions) == 0 {
				465	return ""
				466	}
				467
				468	var sb strings.Builder
				469	sb.WriteString("Gopls check issues detected:\n\n")
				470
				471	// Format each issue
				472	for i, issue := range regressions {
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	473	sb.WriteString(fmt.Sprintf("%d. %s: %s\n", i+1, filepath.Join(r.repoRoot, issue.Position), issue.Message))
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	474	}
				475
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	476	sb.WriteString("\nIMPORTANT: Only fix new gopls check issues in parts of the code that you have already edited.")
				477	sb.WriteString(" Do not change existing code that was not part of your current edits.\n")
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	478	return sb.String()
				479	}
				480
				481	func (r *CodeReviewer) HasReviewed(commit string) bool {
				482	return slices.Contains(r.reviewed, commit)
				483	}
				484
				485	func (r *CodeReviewer) IsInitialCommit(commit string) bool {
				486	return commit == r.initialCommit
				487	}
				488
				489	// packagesForFiles returns maps of packages related to the given files:
				490	// 1. directPkgs: packages that directly contain the changed files
				491	// 2. allPkgs: all packages that might be affected, including downstream packages that depend on the direct packages
				492	// It may include false positives.
				493	// Files must be absolute paths!
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	494	func (r CodeReviewer) packagesForFiles(ctx context.Context, files []string) (allPkgs map[string]packages.Package, err error) {
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	495	for _, f := range files {
				496	if !filepath.IsAbs(f) {
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	497	return nil, fmt.Errorf("path %q is not absolute", f)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	498	}
				499	}
				500	cfg := &packages.Config{
				501	Mode: packages.LoadImports \| packages.NeedEmbedFiles,
				502	Context: ctx,
				503	// Logf: func(msg string, args ...any) {
				504	// slog.DebugContext(ctx, "loading go packages", "msg", fmt.Sprintf(msg, args...))
				505	// },
				506	// TODO: in theory, go.mod might not be in the repo root, and there might be multiple go.mod files.
				507	// We can cross that bridge when we get there.
				508	Dir: r.repoRoot,
				509	Tests: true,
				510	}
				511	universe, err := packages.Load(cfg, "./...")
				512	if err != nil {
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	513	return nil, err
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	514	}
				515	// Identify packages that directly contain the changed files
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	516	directPkgs := make(map[string]*packages.Package) // import path -> package
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	517	for _, pkg := range universe {
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	518	pkgFiles := allFiles(pkg)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	519	for _, file := range files {
				520	if pkgFiles[file] {
				521	// prefer test packages, as they contain strictly more files (right?)
				522	prev := directPkgs[pkg.PkgPath]
				523	if prev == nil \|\| prev.ForTest == "" {
				524	directPkgs[pkg.PkgPath] = pkg
				525	}
				526	}
				527	}
				528	}
				529
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	530	allPkgs = maps.Clone(directPkgs)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	531
				532	// Add packages that depend on the direct packages
				533	addDependentPackages(universe, allPkgs)
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	534	return allPkgs, nil
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	535	}
				536
				537	// allFiles returns all files that might be referenced by the package.
				538	// It may contain false positives.
				539	func allFiles(p *packages.Package) map[string]bool {
				540	files := make(map[string]bool)
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	541	// Add files from package info
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	542	add := [][]string{p.GoFiles, p.CompiledGoFiles, p.OtherFiles, p.EmbedFiles, p.IgnoredFiles}
				543	for _, extra := range add {
				544	for _, file := range extra {
				545	files[file] = true
				546	}
				547	}
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	548	// Add files from testdata directory
				549	testdataDir := filepath.Join(p.Dir, "testdata")
				550	if _, err := os.Stat(testdataDir); err == nil {
				551	fsys := os.DirFS(p.Dir)
				552	fs.WalkDir(fsys, "testdata", func(path string, d fs.DirEntry, err error) error {
				553	if err == nil && !d.IsDir() {
				554	files[filepath.Join(p.Dir, path)] = true
				555	}
				556	return nil
				557	})
				558	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	559	return files
				560	}
				561
				562	// addDependentPackages adds to pkgs all packages from universe
				563	// that directly or indirectly depend on any package already in pkgs.
				564	func addDependentPackages(universe []packages.Package, pkgs map[string]packages.Package) {
				565	for {
				566	changed := false
				567	for _, p := range universe {
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	568	if strings.HasSuffix(p.PkgPath, ".test") { // ick, but I don't see another way
				569	// skip test packages
				570	continue
				571	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	572	if _, ok := pkgs[p.PkgPath]; ok {
				573	// already in pkgs
				574	continue
				575	}
				576	for importPath := range p.Imports {
				577	if _, ok := pkgs[importPath]; ok {
				578	// imports a package dependent on pkgs, add it
				579	pkgs[p.PkgPath] = p
				580	changed = true
				581	break
				582	}
				583	}
				584	}
				585	if !changed {
				586	break
				587	}
				588	}
				589	}
				590
				591	// testJSON is a union of BuildEvent and TestEvent
				592	type testJSON struct {
				593	// TestEvent only:
				594	// The Time field holds the time the event happened. It is conventionally omitted
				595	// for cached test results.
				596	Time time.Time `json:"Time"`
				597	// BuildEvent only:
				598	// The ImportPath field gives the package ID of the package being built.
				599	// This matches the Package.ImportPath field of go list -json and the
				600	// TestEvent.FailedBuild field of go test -json. Note that it does not
				601	// match TestEvent.Package.
				602	ImportPath string `json:"ImportPath"` // BuildEvent only
				603	// TestEvent only:
				604	// The Package field, if present, specifies the package being tested. When the
				605	// go command runs parallel tests in -json mode, events from different tests are
				606	// interlaced; the Package field allows readers to separate them.
				607	Package string `json:"Package"`
				608	// Action is used in both BuildEvent and TestEvent.
				609	// It is the key to distinguishing between them.
				610	// BuildEvent:
				611	// build-output or build-fail
				612	// TestEvent:
				613	// start, run, pause, cont, pass, bench, fail, output, skip
				614	Action string `json:"Action"`
				615	// TestEvent only:
				616	// The Test field, if present, specifies the test, example, or benchmark function
				617	// that caused the event. Events for the overall package test do not set Test.
				618	Test string `json:"Test"`
				619	// TestEvent only:
				620	// The Elapsed field is set for "pass" and "fail" events. It gives the time elapsed in seconds
				621	// for the specific test or the overall package test that passed or failed.
				622	Elapsed float64
				623	// TestEvent:
				624	// The Output field is set for Action == "output" and is a portion of the
				625	// test's output (standard output and standard error merged together). The
				626	// output is unmodified except that invalid UTF-8 output from a test is coerced
				627	// into valid UTF-8 by use of replacement characters. With that one exception,
				628	// the concatenation of the Output fields of all output events is the exact output
				629	// of the test execution.
				630	// BuildEvent:
				631	// The Output field is set for Action == "build-output" and is a portion of
				632	// the build's output. The concatenation of the Output fields of all output
				633	// events is the exact output of the build. A single event may contain one
				634	// or more lines of output and there may be more than one output event for
				635	// a given ImportPath. This matches the definition of the TestEvent.Output
				636	// field produced by go test -json.
				637	Output string `json:"Output"`
				638	// TestEvent only:
				639	// The FailedBuild field is set for Action == "fail" if the test failure was caused
				640	// by a build failure. It contains the package ID of the package that failed to
				641	// build. This matches the ImportPath field of the "go list" output, as well as the
				642	// BuildEvent.ImportPath field as emitted by "go build -json".
				643	FailedBuild string `json:"FailedBuild"`
				644	}
				645
				646	// parseTestResults converts test output in JSONL format into a slice of testJSON objects
				647	func parseTestResults(testOutput []byte) ([]testJSON, error) {
				648	var results []testJSON
				649	dec := json.NewDecoder(bytes.NewReader(testOutput))
				650	for {
				651	var event testJSON
				652	if err := dec.Decode(&event); err != nil {
				653	if err == io.EOF {
				654	break
				655	}
				656	return nil, err
				657	}
				658	results = append(results, event)
				659	}
				660	return results, nil
				661	}
				662
				663	// testStatus represents the status of a test in a given commit
				664	type testStatus int
				665
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	666	//go:generate go tool stringer -type=testStatus -trimprefix=testStatus
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	667	const (
				668	testStatusUnknown testStatus = iota
				669	testStatusPass
				670	testStatusFail
				671	testStatusBuildFail
				672	testStatusSkip
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	673	testStatusNoTests // no tests exist for this package
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	674	)
				675
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	676	// testRegression represents a test that regressed between commits
				677	type testRegression struct {
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	678	Package string
				679	Test string // empty for package tests
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	680	BeforeStatus testStatus
				681	AfterStatus testStatus
				682	Output string // failure output in the after state
				683	}
				684
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	685	func (r *testRegression) Source() string {
				686	if r.Test == "" {
				687	return r.Package
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	688	}
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	689	return fmt.Sprintf("%s.%s", r.Package, r.Test)
				690	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	691
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	692	type packageResult struct {
				693	Status testStatus // overall status for the package
				694	TestStatus map[string]testStatus // name -> status
				695	TestOutput map[string][]string // name -> output parts
				696	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	697
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	698	// collectTestStatuses processes a slice of test events and returns rich status information
				699	func collectTestStatuses(results []testJSON) map[string]*packageResult {
				700	m := make(map[string]*packageResult)
				701
				702	for _, event := range results {
				703	pkg := event.Package
				704	p, ok := m[pkg]
				705	if !ok {
				706	p = new(packageResult)
				707	p.TestStatus = make(map[string]testStatus)
				708	p.TestOutput = make(map[string][]string)
				709	m[pkg] = p
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	710	}
				711
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	712	switch event.Action {
				713	case "output":
				714	p.TestOutput[event.Test] = append(p.TestOutput[event.Test], event.Output)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	715	continue
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	716	case "pass":
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	717	if event.Test == "" {
				718	p.Status = testStatusPass
				719	} else {
				720	p.TestStatus[event.Test] = testStatusPass
				721	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	722	case "fail":
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	723	if event.Test == "" {
				724	if event.FailedBuild != "" {
				725	p.Status = testStatusBuildFail
				726	} else {
				727	p.Status = testStatusFail
				728	}
				729	} else {
				730	p.TestStatus[event.Test] = testStatusFail
				731	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	732	case "skip":
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	733	if event.Test == "" {
				734	p.Status = testStatusNoTests
				735	} else {
				736	p.TestStatus[event.Test] = testStatusSkip
				737	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	738	}
				739	}
				740
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	741	return m
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	742	}
				743
				744	// compareTestResults identifies tests that have regressed between commits
				745	func (r *CodeReviewer) compareTestResults(beforeResults, afterResults []testJSON) ([]testRegression, error) {
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	746	before := collectTestStatuses(beforeResults)
				747	after := collectTestStatuses(afterResults)
				748	var testLevelRegressions []testRegression
				749	var packageLevelRegressions []testRegression
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	750
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	751	afterPkgs := slices.Sorted(maps.Keys(after))
				752	for _, pkg := range afterPkgs {
				753	afterResult := after[pkg]
				754	afterStatus := afterResult.Status
				755	// Can't short-circuit here when tests are passing, because we need to check for skipped tests.
				756	beforeResult, ok := before[pkg]
				757	beforeStatus := testStatusNoTests
				758	if ok {
				759	beforeStatus = beforeResult.Status
				760	}
				761	// If things no longer build, stop at the package level.
				762	// Otherwise, proceed to the test level, so we have more precise information.
				763	if afterStatus == testStatusBuildFail && beforeStatus != testStatusBuildFail {
				764	packageLevelRegressions = append(packageLevelRegressions, testRegression{
				765	Package: pkg,
				766	BeforeStatus: beforeStatus,
				767	AfterStatus: afterStatus,
				768	})
				769	continue
				770	}
				771	tests := slices.Sorted(maps.Keys(afterResult.TestStatus))
				772	for _, test := range tests {
				773	afterStatus := afterResult.TestStatus[test]
				774	switch afterStatus {
				775	case testStatusPass:
				776	continue
				777	case testStatusUnknown:
				778	slog.WarnContext(context.Background(), "unknown test status", "package", pkg, "test", test)
				779	continue
				780	}
				781	beforeStatus := beforeResult.TestStatus[test]
				782	if isRegression(beforeStatus, afterStatus) {
				783	testLevelRegressions = append(testLevelRegressions, testRegression{
				784	Package: pkg,
				785	Test: test,
				786	BeforeStatus: beforeStatus,
				787	AfterStatus: afterStatus,
				788	Output: strings.Join(afterResult.TestOutput[test], ""),
				789	})
				790	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	791	}
				792	}
				793
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	794	// If we have test-level regressions, report only those
				795	// Otherwise, report package-level regressions
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	796	var regressions []testRegression
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	797	if len(testLevelRegressions) > 0 {
				798	regressions = testLevelRegressions
				799	} else {
				800	regressions = packageLevelRegressions
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	801	}
				802
				803	// Sort regressions for consistent output
				804	slices.SortFunc(regressions, func(a, b testRegression) int {
				805	// First by package
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	806	if c := strings.Compare(a.Package, b.Package); c != 0 {
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	807	return c
				808	}
				809	// Then by test name
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	810	return strings.Compare(a.Test, b.Test)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	811	})
				812
				813	return regressions, nil
				814	}
				815
				816	// badnessLevels maps test status to a badness level
				817	// Higher values indicate worse status (more severe issues)
				818	var badnessLevels = map[testStatus]int{
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	819	testStatusBuildFail: 5, // Worst
				820	testStatusFail: 4,
				821	testStatusSkip: 3,
				822	testStatusNoTests: 2,
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	823	testStatusPass: 1,
				824	testStatusUnknown: 0, // Least bad - avoids false positives
				825	}
				826
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	827	// regressionFormatter defines a mapping of before/after state pairs to descriptive messages
				828	type regressionKey struct {
				829	before, after testStatus
				830	}
				831
				832	var regressionMessages = map[regressionKey]string{
				833	{testStatusUnknown, testStatusBuildFail}: "New test has build/vet errors",
				834	{testStatusUnknown, testStatusFail}: "New test is failing",
				835	{testStatusUnknown, testStatusSkip}: "New test is skipped",
				836	{testStatusPass, testStatusBuildFail}: "Was passing, now has build/vet errors",
				837	{testStatusPass, testStatusFail}: "Was passing, now failing",
				838	{testStatusPass, testStatusSkip}: "Was passing, now skipped",
				839	{testStatusNoTests, testStatusBuildFail}: "Previously had no tests, now has build/vet errors",
				840	{testStatusNoTests, testStatusFail}: "Previously had no tests, now has failing tests",
				841	{testStatusNoTests, testStatusSkip}: "Previously had no tests, now has skipped tests",
				842	{testStatusSkip, testStatusBuildFail}: "Was skipped, now has build/vet errors",
				843	{testStatusSkip, testStatusFail}: "Was skipped, now failing",
				844	{testStatusFail, testStatusBuildFail}: "Was failing, now has build/vet errors",
				845	}
				846
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	847	// isRegression determines if a test has regressed based on before and after status
				848	// A regression is defined as an increase in badness level
				849	func isRegression(before, after testStatus) bool {
				850	// Higher badness level means worse status
				851	return badnessLevels[after] > badnessLevels[before]
				852	}
				853
				854	// formatTestRegressions generates a human-readable summary of test regressions
				855	func (r *CodeReviewer) formatTestRegressions(regressions []testRegression) string {
				856	if len(regressions) == 0 {
				857	return ""
				858	}
				859
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	860	buf := new(strings.Builder)
				861	fmt.Fprintf(buf, "Test regressions detected between initial commit (%s) and HEAD:\n\n", r.initialCommit)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	862
				863	for i, reg := range regressions {
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	864	fmt.Fprintf(buf, "%d: %v: ", i+1, reg.Source())
				865	key := regressionKey{reg.BeforeStatus, reg.AfterStatus}
				866	message, exists := regressionMessages[key]
				867	if !exists {
				868	message = "Regression detected"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	869	}
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	870	fmt.Fprintf(buf, "%s\n", message)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	871	}
				872
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	873	return buf.String()
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	874	}
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	875
				876	// RelatedFile represents a file historically related to the changed files
				877	type RelatedFile struct {
				878	Path string // Path to the file
				879	Correlation float64 // Correlation score (0.0-1.0)
				880	}
				881
				882	// findRelatedFiles identifies files that are historically related to the changed files
				883	// by analyzing git commit history for co-occurrences.
				884	func (r *CodeReviewer) findRelatedFiles(ctx context.Context, changedFiles []string) ([]RelatedFile, error) {
				885	commits, err := r.getCommitsTouchingFiles(ctx, changedFiles)
				886	if err != nil {
				887	return nil, fmt.Errorf("failed to get commits touching files: %w", err)
				888	}
				889	if len(commits) == 0 {
				890	return nil, nil
				891	}
				892
				893	relChanged := make(map[string]bool, len(changedFiles))
				894	for _, file := range changedFiles {
				895	rel, err := filepath.Rel(r.repoRoot, file)
				896	if err != nil {
				897	return nil, fmt.Errorf("failed to get relative path for %s: %w", file, err)
				898	}
				899	relChanged[rel] = true
				900	}
				901
				902	historyFiles := make(map[string]int)
				903	var historyMu sync.Mutex
				904
				905	maxWorkers := runtime.GOMAXPROCS(0)
				906	semaphore := make(chan bool, maxWorkers)
				907	var wg sync.WaitGroup
				908
				909	for _, commit := range commits {
				910	wg.Add(1)
				911	semaphore <- true // acquire
				912
				913	go func(commit string) {
				914	defer wg.Done()
				915	defer func() { <-semaphore }() // release
				916	commitFiles, err := r.getFilesInCommit(ctx, commit)
				917	if err != nil {
				918	slog.WarnContext(ctx, "Failed to get files in commit", "commit", commit, "err", err)
				919	return
				920	}
				921	incr := 0
				922	for _, file := range commitFiles {
				923	if relChanged[file] {
				924	incr++
				925	}
				926	}
				927	if incr == 0 {
				928	return
				929	}
				930	historyMu.Lock()
				931	defer historyMu.Unlock()
				932	for _, file := range commitFiles {
				933	historyFiles[file] += incr
				934	}
				935	}(commit)
				936	}
				937	wg.Wait()
				938
				939	// normalize
				940	maxCount := 0
				941	for _, count := range historyFiles {
				942	maxCount = max(maxCount, count)
				943	}
				944	if maxCount == 0 {
				945	return nil, nil
				946	}
				947
				948	var relatedFiles []RelatedFile
				949	for file, count := range historyFiles {
				950	if relChanged[file] {
				951	// Don't include inputs in the output.
				952	continue
				953	}
				954	correlation := float64(count) / float64(maxCount)
				955	// Require min correlation to avoid noise
				956	if correlation >= 0.1 {
				957	relatedFiles = append(relatedFiles, RelatedFile{Path: file, Correlation: correlation})
				958	}
				959	}
				960
				961	// Highest correlation first
				962	slices.SortFunc(relatedFiles, func(a, b RelatedFile) int {
				963	return -1 * cmp.Compare(a.Correlation, b.Correlation)
				964	})
				965
				966	// Limit to 1 correlated file per input file.
				967	// (Arbitrary limit, to be adjusted.)
				968	maxFiles := len(changedFiles)
				969	if len(relatedFiles) > maxFiles {
				970	relatedFiles = relatedFiles[:maxFiles]
				971	}
				972
				973	// TODO: add an LLM in the mix here (like the keyword search tool) to do a filtering pass,
				974	// and then increase the strength of the wording in the relatedFiles message.
				975
				976	return relatedFiles, nil
				977	}
				978
				979	// getCommitsTouchingFiles returns all commits that touch any of the specified files
				980	func (r *CodeReviewer) getCommitsTouchingFiles(ctx context.Context, files []string) ([]string, error) {
				981	if len(files) == 0 {
				982	return nil, nil
				983	}
Josh Bleecher Snyder	d4b1cc7	2025-04-29 13:49:18 +0000	[diff] [blame]	984	fileArgs := append([]string{"rev-list", "--all", "--date-order", "--max-count=100", "--"}, files...)
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	985	cmd := exec.CommandContext(ctx, "git", fileArgs...)
				986	cmd.Dir = r.repoRoot
				987	out, err := cmd.CombinedOutput()
				988	if err != nil {
				989	return nil, fmt.Errorf("failed to get commits: %w\n%s", err, out)
				990	}
				991	return nonEmptyTrimmedLines(out), nil
				992	}
				993
				994	// getFilesInCommit returns all files changed in a specific commit
				995	func (r *CodeReviewer) getFilesInCommit(ctx context.Context, commit string) ([]string, error) {
				996	cmd := exec.CommandContext(ctx, "git", "diff-tree", "--no-commit-id", "--name-only", "-r", commit)
				997	cmd.Dir = r.repoRoot
				998	out, err := cmd.CombinedOutput()
				999	if err != nil {
				1000	return nil, fmt.Errorf("failed to get files in commit: %w\n%s", err, out)
				1001	}
				1002	return nonEmptyTrimmedLines(out), nil
				1003	}
				1004
				1005	func nonEmptyTrimmedLines(b []byte) []string {
				1006	var lines []string
				1007	for line := range strings.Lines(string(b)) {
				1008	line = strings.TrimSpace(line)
				1009	if line != "" {
				1010	lines = append(lines, line)
				1011	}
				1012	}
				1013	return lines
				1014	}
				1015
				1016	// formatRelatedFiles formats the related files list into a human-readable message
				1017	func (r *CodeReviewer) formatRelatedFiles(files []RelatedFile) string {
				1018	if len(files) == 0 {
				1019	return ""
				1020	}
				1021
				1022	buf := new(strings.Builder)
				1023
				1024	fmt.Fprintf(buf, "Potentially related files:\n\n")
				1025
				1026	for _, file := range files {
				1027	fmt.Fprintf(buf, "- %s (%0.0f%%)\n", file.Path, 100*file.Correlation)
				1028	}
				1029
				1030	fmt.Fprintf(buf, "\nThese files have historically changed with the files you have modified. Consider whether they require updates as well.\n")
				1031	return buf.String()
				1032	}