Blame - claudetool/differential.go - sketch

blob: 1d7ff6e7978fcc27ca294f2a02945f88e470e57b [file] [log] [blame]

Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	1	package claudetool
				2
				3	import (
				4	"bytes"
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	5	"cmp"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	6	"context"
				7	"encoding/json"
				8	"fmt"
				9	"io"
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	10	"io/fs"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	11	"log/slog"
				12	"maps"
				13	"os"
				14	"os/exec"
				15	"path/filepath"
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	16	"runtime"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	17	"slices"
				18	"strings"
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	19	"sync"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	20	"time"
				21
				22	"golang.org/x/tools/go/packages"
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	23	"sketch.dev/llm"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	24	)
				25
				26	// This file does differential quality analysis of a commit relative to a base commit.
				27
				28	// Tool returns a tool spec for a CodeReview tool backed by r.
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	29	func (r CodeReviewer) Tool() llm.Tool {
				30	spec := &llm.Tool{
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	31	Name: "codereview",
				32	Description: `Run an automated code review.`,
				33	// If you modify this, update the termui template for prettier rendering.
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	34	InputSchema: llm.MustSchema(`{"type": "object", "properties": {}}`),
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	35	Run: r.Run,
				36	}
				37	return spec
				38	}
				39
				40	func (r *CodeReviewer) Run(ctx context.Context, m json.RawMessage) (string, error) {
Josh Bleecher Snyder	2dc86b9	2025-04-29 14:11:58 +0000	[diff] [blame]	41	// NOTE: If you add or modify error messages here, update the corresponding UI parsing in:
				42	// webui/src/web-components/sketch-tool-card.ts (SketchToolCardCodeReview.getStatusIcon)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	43	if err := r.RequireNormalGitState(ctx); err != nil {
				44	slog.DebugContext(ctx, "CodeReviewer.Run: failed to check for normal git state", "err", err)
				45	return "", err
				46	}
				47	if err := r.RequireNoUncommittedChanges(ctx); err != nil {
				48	slog.DebugContext(ctx, "CodeReviewer.Run: failed to check for uncommitted changes", "err", err)
				49	return "", err
				50	}
				51
				52	// Check that the current commit is not the initial commit
				53	currentCommit, err := r.CurrentCommit(ctx)
				54	if err != nil {
				55	slog.DebugContext(ctx, "CodeReviewer.Run: failed to get current commit", "err", err)
				56	return "", err
				57	}
				58	if r.IsInitialCommit(currentCommit) {
				59	slog.DebugContext(ctx, "CodeReviewer.Run: current commit is initial commit, nothing to review")
				60	return "", fmt.Errorf("no new commits have been added, nothing to review")
				61	}
				62
				63	// No matter what failures happen from here out, we will declare this to have been reviewed.
				64	// This should help avoid the model getting blocked by a broken code review tool.
				65	r.reviewed = append(r.reviewed, currentCommit)
				66
				67	changedFiles, err := r.changedFiles(ctx, r.initialCommit, currentCommit)
				68	if err != nil {
				69	slog.DebugContext(ctx, "CodeReviewer.Run: failed to get changed files", "err", err)
				70	return "", err
				71	}
				72
				73	// Prepare to analyze before/after for the impacted files.
				74	// We use the current commit to determine what packages exist and are impacted.
				75	// The packages in the initial commit may be different.
				76	// Good enough for now.
				77	// TODO: do better
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	78	allPkgs, err := r.packagesForFiles(ctx, changedFiles)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	79	if err != nil {
				80	// TODO: log and skip to stuff that doesn't require packages
				81	slog.DebugContext(ctx, "CodeReviewer.Run: failed to get packages for files", "err", err)
				82	return "", err
				83	}
				84	allPkgList := slices.Collect(maps.Keys(allPkgs))
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	85
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	86	var errorMessages []string // problems we want the model to address
				87	var infoMessages []string // info the model should consider
				88
				89	// Find potentially related files that should also be considered
				90	// TODO: add some caching here, since this depends only on the initial commit and the changed files, not the details of the changes
				91	relatedFiles, err := r.findRelatedFiles(ctx, changedFiles)
				92	if err != nil {
				93	slog.DebugContext(ctx, "CodeReviewer.Run: failed to find related files", "err", err)
				94	} else {
				95	relatedMsg := r.formatRelatedFiles(relatedFiles)
				96	if relatedMsg != "" {
				97	infoMessages = append(infoMessages, relatedMsg)
				98	}
				99	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	100
				101	testMsg, err := r.checkTests(ctx, allPkgList)
				102	if err != nil {
				103	slog.DebugContext(ctx, "CodeReviewer.Run: failed to check tests", "err", err)
				104	return "", err
				105	}
				106	if testMsg != "" {
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	107	errorMessages = append(errorMessages, testMsg)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	108	}
				109
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	110	goplsMsg, err := r.checkGopls(ctx, changedFiles) // includes vet checks
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	111	if err != nil {
				112	slog.DebugContext(ctx, "CodeReviewer.Run: failed to check gopls", "err", err)
				113	return "", err
				114	}
				115	if goplsMsg != "" {
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	116	errorMessages = append(errorMessages, goplsMsg)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	117	}
				118
Josh Bleecher Snyder	e2518e5	2025-04-29 11:13:40 -0700	[diff] [blame]	119	if r.llmReview {
				120	llmComments, err := r.doLLMReview(ctx)
				121	if err != nil {
				122	// Log the error but don't fail the codereview if this check fails
				123	slog.WarnContext(ctx, "CodeReviewer.Run: error doing LLM review", "err", err)
				124	}
				125	if llmComments != "" {
				126	infoMessages = append(infoMessages, llmComments)
				127	}
				128	}
				129
Josh Bleecher Snyder	2dc86b9	2025-04-29 14:11:58 +0000	[diff] [blame]	130	// NOTE: If you change this output format, update the corresponding UI parsing in:
				131	// webui/src/web-components/sketch-tool-card.ts (SketchToolCardCodeReview.getStatusIcon)
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	132	buf := new(strings.Builder)
				133	if len(infoMessages) > 0 {
				134	buf.WriteString("# Info\n\n")
				135	buf.WriteString(strings.Join(infoMessages, "\n\n"))
				136	buf.WriteString("\n\n")
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	137	}
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	138	if len(errorMessages) > 0 {
				139	buf.WriteString("# Errors\n\n")
				140	buf.WriteString(strings.Join(errorMessages, "\n\n"))
				141	buf.WriteString("\n\nPlease fix before proceeding.\n")
				142	}
				143	if buf.Len() == 0 {
				144	buf.WriteString("OK")
				145	}
				146	return buf.String(), nil
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	147	}
				148
				149	func (r *CodeReviewer) initializeInitialCommitWorktree(ctx context.Context) error {
				150	if r.initialWorktree != "" {
				151	return nil
				152	}
				153	tmpDir, err := os.MkdirTemp("", "sketch-codereview-worktree")
				154	if err != nil {
				155	return err
				156	}
				157	worktreeCmd := exec.CommandContext(ctx, "git", "worktree", "add", "--detach", tmpDir, r.initialCommit)
				158	worktreeCmd.Dir = r.repoRoot
				159	out, err := worktreeCmd.CombinedOutput()
				160	if err != nil {
				161	return fmt.Errorf("unable to create worktree for initial commit: %w\n%s", err, out)
				162	}
				163	r.initialWorktree = tmpDir
				164	return nil
				165	}
				166
				167	func (r *CodeReviewer) checkTests(ctx context.Context, pkgList []string) (string, error) {
				168	goTestArgs := []string{"test", "-json", "-v"}
				169	goTestArgs = append(goTestArgs, pkgList...)
				170
				171	afterTestCmd := exec.CommandContext(ctx, "go", goTestArgs...)
				172	afterTestCmd.Dir = r.repoRoot
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	173	afterTestOut, _ := afterTestCmd.Output()
				174	// unfortunately, we can't short-circuit here even if all tests pass,
				175	// because we need to check for skipped tests.
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	176
				177	err := r.initializeInitialCommitWorktree(ctx)
				178	if err != nil {
				179	return "", err
				180	}
				181
				182	beforeTestCmd := exec.CommandContext(ctx, "go", goTestArgs...)
				183	beforeTestCmd.Dir = r.initialWorktree
				184	beforeTestOut, _ := beforeTestCmd.Output() // ignore error, interesting info is in the output
				185
				186	// Parse the jsonl test results
				187	beforeResults, beforeParseErr := parseTestResults(beforeTestOut)
				188	if beforeParseErr != nil {
				189	return "", fmt.Errorf("unable to parse test results for initial commit: %w\n%s", beforeParseErr, beforeTestOut)
				190	}
				191	afterResults, afterParseErr := parseTestResults(afterTestOut)
				192	if afterParseErr != nil {
				193	return "", fmt.Errorf("unable to parse test results for current commit: %w\n%s", afterParseErr, afterTestOut)
				194	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	195	testRegressions, err := r.compareTestResults(beforeResults, afterResults)
				196	if err != nil {
				197	return "", fmt.Errorf("failed to compare test results: %w", err)
				198	}
				199	// TODO: better output formatting?
				200	res := r.formatTestRegressions(testRegressions)
				201	return res, nil
				202	}
				203
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	204	// GoplsIssue represents a single issue reported by gopls check
				205	type GoplsIssue struct {
				206	Position string // File position in format "file:line:col-range"
				207	Message string // Description of the issue
				208	}
				209
				210	// checkGopls runs gopls check on the provided files in both the current and initial state,
				211	// compares the results, and reports any new issues introduced in the current state.
				212	func (r *CodeReviewer) checkGopls(ctx context.Context, changedFiles []string) (string, error) {
				213	if len(changedFiles) == 0 {
				214	return "", nil // no files to check
				215	}
				216
				217	// Filter out non-Go files as gopls only works on Go files
				218	// and verify they still exist (not deleted)
				219	var goFiles []string
				220	for _, file := range changedFiles {
				221	if !strings.HasSuffix(file, ".go") {
				222	continue // not a Go file
				223	}
				224
				225	// Check if the file still exists (not deleted)
				226	if _, err := os.Stat(file); os.IsNotExist(err) {
				227	continue // file doesn't exist anymore (deleted)
				228	}
				229
				230	goFiles = append(goFiles, file)
				231	}
				232
				233	if len(goFiles) == 0 {
				234	return "", nil // no Go files to check
				235	}
				236
				237	// Run gopls check on the current state
				238	goplsArgs := append([]string{"check"}, goFiles...)
				239
				240	afterGoplsCmd := exec.CommandContext(ctx, "gopls", goplsArgs...)
				241	afterGoplsCmd.Dir = r.repoRoot
				242	afterGoplsOut, err := afterGoplsCmd.CombinedOutput() // gopls returns non-zero if it finds issues
				243	if err != nil {
				244	// Check if the output looks like real gopls issues or if it's just error output
				245	if !looksLikeGoplsIssues(afterGoplsOut) {
				246	slog.WarnContext(ctx, "CodeReviewer.checkGopls: gopls check failed to run properly", "err", err, "output", string(afterGoplsOut))
				247	return "", nil // Skip rather than failing the entire code review
				248	}
				249	// Otherwise, proceed with parsing - it's likely just the non-zero exit code due to found issues
				250	}
				251
				252	// Parse the output
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	253	afterIssues := parseGoplsOutput(r.repoRoot, afterGoplsOut)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	254
				255	// If no issues were found, we're done
				256	if len(afterIssues) == 0 {
				257	return "", nil
				258	}
				259
				260	// Gopls detected issues in the current state, check if they existed in the initial state
				261	initErr := r.initializeInitialCommitWorktree(ctx)
				262	if initErr != nil {
				263	return "", err
				264	}
				265
				266	// For each file that exists in the initial commit, run gopls check
				267	var initialFilesToCheck []string
				268	for _, file := range goFiles {
				269	// Get relative path for git operations
				270	relFile, err := filepath.Rel(r.repoRoot, file)
				271	if err != nil {
				272	slog.WarnContext(ctx, "CodeReviewer.checkGopls: failed to get relative path", "repo_root", r.repoRoot, "file", file, "err", err)
				273	continue
				274	}
				275
				276	// Check if the file exists in the initial commit
				277	checkCmd := exec.CommandContext(ctx, "git", "cat-file", "-e", fmt.Sprintf("%s:%s", r.initialCommit, relFile))
				278	checkCmd.Dir = r.repoRoot
				279	if err := checkCmd.Run(); err == nil {
				280	// File exists in initial commit
				281	initialFilePath := filepath.Join(r.initialWorktree, relFile)
				282	initialFilesToCheck = append(initialFilesToCheck, initialFilePath)
				283	}
				284	}
				285
				286	// Run gopls check on the files that existed in the initial commit
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	287	var beforeIssues []GoplsIssue
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	288	if len(initialFilesToCheck) > 0 {
				289	beforeGoplsArgs := append([]string{"check"}, initialFilesToCheck...)
				290	beforeGoplsCmd := exec.CommandContext(ctx, "gopls", beforeGoplsArgs...)
				291	beforeGoplsCmd.Dir = r.initialWorktree
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	292	beforeGoplsOut, beforeCmdErr := beforeGoplsCmd.CombinedOutput()
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	293	if beforeCmdErr != nil && !looksLikeGoplsIssues(beforeGoplsOut) {
				294	// If gopls fails to run properly on the initial commit, log a warning and continue
				295	// with empty before issues - this will be conservative and report more issues
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	296	slog.WarnContext(ctx, "CodeReviewer.checkGopls: gopls check failed on initial commit", "err", err, "output", string(beforeGoplsOut))
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	297	} else {
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	298	beforeIssues = parseGoplsOutput(r.initialWorktree, beforeGoplsOut)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	299	}
				300	}
				301
				302	// Find new issues that weren't present in the initial state
				303	goplsRegressions := findGoplsRegressions(beforeIssues, afterIssues)
				304	if len(goplsRegressions) == 0 {
				305	return "", nil // no new issues
				306	}
				307
				308	// Format the results
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	309	return r.formatGoplsRegressions(goplsRegressions), nil
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	310	}
				311
				312	// parseGoplsOutput parses the text output from gopls check
				313	// Each line has the format: '/path/to/file.go:448:22-26: unused parameter: path'
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	314	func parseGoplsOutput(root string, output []byte) []GoplsIssue {
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	315	var issues []GoplsIssue
				316	lines := strings.Split(string(output), "\n")
				317
				318	for _, line := range lines {
				319	line = strings.TrimSpace(line)
				320	if line == "" {
				321	continue
				322	}
				323
				324	// Skip lines that look like error messages rather than gopls issues
				325	if strings.HasPrefix(line, "Error:") \|\|
				326	strings.HasPrefix(line, "Failed:") \|\|
				327	strings.HasPrefix(line, "Warning:") \|\|
				328	strings.HasPrefix(line, "gopls:") {
				329	continue
				330	}
				331
				332	// Find the first colon that separates the file path from the line number
				333	firstColonIdx := strings.Index(line, ":")
				334	if firstColonIdx < 0 {
				335	continue // Invalid format
				336	}
				337
				338	// Verify the part before the first colon looks like a file path
				339	potentialPath := line[:firstColonIdx]
				340	if !strings.HasSuffix(potentialPath, ".go") {
				341	continue // Not a Go file path
				342	}
				343
				344	// Find the position of the first message separator ': '
				345	// This separates the position info from the message
				346	messageStart := strings.Index(line, ": ")
				347	if messageStart < 0 \|\| messageStart <= firstColonIdx {
				348	continue // Invalid format
				349	}
				350
				351	// Extract position and message
				352	position := line[:messageStart]
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	353	rel, err := filepath.Rel(root, position)
				354	if err == nil {
				355	position = rel
				356	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	357	message := line[messageStart+2:] // Skip the ': ' separator
				358
				359	// Verify position has the expected format (at least 2 colons for line:col)
				360	colonCount := strings.Count(position, ":")
				361	if colonCount < 2 {
				362	continue // Not enough position information
				363	}
				364
				365	issues = append(issues, GoplsIssue{
				366	Position: position,
				367	Message: message,
				368	})
				369	}
				370
				371	return issues
				372	}
				373
				374	// looksLikeGoplsIssues checks if the output appears to be actual gopls issues
				375	// rather than error messages about gopls itself failing
				376	func looksLikeGoplsIssues(output []byte) bool {
				377	// If output is empty, it's not valid issues
				378	if len(output) == 0 {
				379	return false
				380	}
				381
				382	// Check if output has at least one line that looks like a gopls issue
				383	// A gopls issue looks like: '/path/to/file.go:123:45-67: message'
				384	lines := strings.Split(string(output), "\n")
				385	for _, line := range lines {
				386	line = strings.TrimSpace(line)
				387	if line == "" {
				388	continue
				389	}
				390
				391	// A gopls issue has at least two colons (file path, line number, column)
				392	// and contains a colon followed by a space (separating position from message)
				393	colonCount := strings.Count(line, ":")
				394	hasSeparator := strings.Contains(line, ": ")
				395
				396	if colonCount >= 2 && hasSeparator {
				397	// Check if it starts with a likely file path (ending in .go)
				398	parts := strings.SplitN(line, ":", 2)
				399	if strings.HasSuffix(parts[0], ".go") {
				400	return true
				401	}
				402	}
				403	}
				404	return false
				405	}
				406
				407	// normalizeGoplsPosition extracts just the file path from a position string
				408	func normalizeGoplsPosition(position string) string {
				409	// Extract just the file path by taking everything before the first colon
				410	parts := strings.Split(position, ":")
				411	if len(parts) < 1 {
				412	return position
				413	}
				414	return parts[0]
				415	}
				416
				417	// findGoplsRegressions identifies gopls issues that are new in the after state
				418	func findGoplsRegressions(before, after []GoplsIssue) []GoplsIssue {
				419	var regressions []GoplsIssue
				420
				421	// Build map of before issues for easier lookup
				422	beforeIssueMap := make(map[string]map[string]bool) // file -> message -> exists
				423	for _, issue := range before {
				424	file := normalizeGoplsPosition(issue.Position)
				425	if _, exists := beforeIssueMap[file]; !exists {
				426	beforeIssueMap[file] = make(map[string]bool)
				427	}
				428	// Store both the exact message and the general issue type for fuzzy matching
				429	beforeIssueMap[file][issue.Message] = true
				430
				431	// Extract the general issue type (everything before the first ':' in the message)
				432	generalIssue := issue.Message
				433	if colonIdx := strings.Index(issue.Message, ":"); colonIdx > 0 {
				434	generalIssue = issue.Message[:colonIdx]
				435	}
				436	beforeIssueMap[file][generalIssue] = true
				437	}
				438
				439	// Check each after issue to see if it's new
				440	for _, afterIssue := range after {
				441	file := normalizeGoplsPosition(afterIssue.Position)
				442	isNew := true
				443
				444	if fileIssues, fileExists := beforeIssueMap[file]; fileExists {
				445	// Check for exact message match
				446	if fileIssues[afterIssue.Message] {
				447	isNew = false
				448	} else {
				449	// Check for general issue type match
				450	generalIssue := afterIssue.Message
				451	if colonIdx := strings.Index(afterIssue.Message, ":"); colonIdx > 0 {
				452	generalIssue = afterIssue.Message[:colonIdx]
				453	}
				454	if fileIssues[generalIssue] {
				455	isNew = false
				456	}
				457	}
				458	}
				459
				460	if isNew {
				461	regressions = append(regressions, afterIssue)
				462	}
				463	}
				464
				465	// Sort regressions for deterministic output
				466	slices.SortFunc(regressions, func(a, b GoplsIssue) int {
				467	return strings.Compare(a.Position, b.Position)
				468	})
				469
				470	return regressions
				471	}
				472
				473	// formatGoplsRegressions generates a human-readable summary of gopls check regressions
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	474	func (r *CodeReviewer) formatGoplsRegressions(regressions []GoplsIssue) string {
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	475	if len(regressions) == 0 {
				476	return ""
				477	}
				478
				479	var sb strings.Builder
				480	sb.WriteString("Gopls check issues detected:\n\n")
				481
				482	// Format each issue
				483	for i, issue := range regressions {
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	484	sb.WriteString(fmt.Sprintf("%d. %s: %s\n", i+1, filepath.Join(r.repoRoot, issue.Position), issue.Message))
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	485	}
				486
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	487	sb.WriteString("\nIMPORTANT: Only fix new gopls check issues in parts of the code that you have already edited.")
				488	sb.WriteString(" Do not change existing code that was not part of your current edits.\n")
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	489	return sb.String()
				490	}
				491
				492	func (r *CodeReviewer) HasReviewed(commit string) bool {
				493	return slices.Contains(r.reviewed, commit)
				494	}
				495
				496	func (r *CodeReviewer) IsInitialCommit(commit string) bool {
				497	return commit == r.initialCommit
				498	}
				499
				500	// packagesForFiles returns maps of packages related to the given files:
				501	// 1. directPkgs: packages that directly contain the changed files
				502	// 2. allPkgs: all packages that might be affected, including downstream packages that depend on the direct packages
				503	// It may include false positives.
				504	// Files must be absolute paths!
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	505	func (r CodeReviewer) packagesForFiles(ctx context.Context, files []string) (allPkgs map[string]packages.Package, err error) {
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	506	for _, f := range files {
				507	if !filepath.IsAbs(f) {
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	508	return nil, fmt.Errorf("path %q is not absolute", f)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	509	}
				510	}
				511	cfg := &packages.Config{
				512	Mode: packages.LoadImports \| packages.NeedEmbedFiles,
				513	Context: ctx,
				514	// Logf: func(msg string, args ...any) {
				515	// slog.DebugContext(ctx, "loading go packages", "msg", fmt.Sprintf(msg, args...))
				516	// },
				517	// TODO: in theory, go.mod might not be in the repo root, and there might be multiple go.mod files.
				518	// We can cross that bridge when we get there.
				519	Dir: r.repoRoot,
				520	Tests: true,
				521	}
				522	universe, err := packages.Load(cfg, "./...")
				523	if err != nil {
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	524	return nil, err
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	525	}
				526	// Identify packages that directly contain the changed files
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	527	directPkgs := make(map[string]*packages.Package) // import path -> package
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	528	for _, pkg := range universe {
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	529	pkgFiles := allFiles(pkg)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	530	for _, file := range files {
				531	if pkgFiles[file] {
				532	// prefer test packages, as they contain strictly more files (right?)
				533	prev := directPkgs[pkg.PkgPath]
				534	if prev == nil \|\| prev.ForTest == "" {
				535	directPkgs[pkg.PkgPath] = pkg
				536	}
				537	}
				538	}
				539	}
				540
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	541	allPkgs = maps.Clone(directPkgs)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	542
				543	// Add packages that depend on the direct packages
				544	addDependentPackages(universe, allPkgs)
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	545	return allPkgs, nil
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	546	}
				547
				548	// allFiles returns all files that might be referenced by the package.
				549	// It may contain false positives.
				550	func allFiles(p *packages.Package) map[string]bool {
				551	files := make(map[string]bool)
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	552	// Add files from package info
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	553	add := [][]string{p.GoFiles, p.CompiledGoFiles, p.OtherFiles, p.EmbedFiles, p.IgnoredFiles}
				554	for _, extra := range add {
				555	for _, file := range extra {
				556	files[file] = true
				557	}
				558	}
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	559	// Add files from testdata directory
				560	testdataDir := filepath.Join(p.Dir, "testdata")
				561	if _, err := os.Stat(testdataDir); err == nil {
				562	fsys := os.DirFS(p.Dir)
				563	fs.WalkDir(fsys, "testdata", func(path string, d fs.DirEntry, err error) error {
				564	if err == nil && !d.IsDir() {
				565	files[filepath.Join(p.Dir, path)] = true
				566	}
				567	return nil
				568	})
				569	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	570	return files
				571	}
				572
				573	// addDependentPackages adds to pkgs all packages from universe
				574	// that directly or indirectly depend on any package already in pkgs.
				575	func addDependentPackages(universe []packages.Package, pkgs map[string]packages.Package) {
				576	for {
				577	changed := false
				578	for _, p := range universe {
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	579	if strings.HasSuffix(p.PkgPath, ".test") { // ick, but I don't see another way
				580	// skip test packages
				581	continue
				582	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	583	if _, ok := pkgs[p.PkgPath]; ok {
				584	// already in pkgs
				585	continue
				586	}
				587	for importPath := range p.Imports {
				588	if _, ok := pkgs[importPath]; ok {
				589	// imports a package dependent on pkgs, add it
				590	pkgs[p.PkgPath] = p
				591	changed = true
				592	break
				593	}
				594	}
				595	}
				596	if !changed {
				597	break
				598	}
				599	}
				600	}
				601
				602	// testJSON is a union of BuildEvent and TestEvent
				603	type testJSON struct {
				604	// TestEvent only:
				605	// The Time field holds the time the event happened. It is conventionally omitted
				606	// for cached test results.
				607	Time time.Time `json:"Time"`
				608	// BuildEvent only:
				609	// The ImportPath field gives the package ID of the package being built.
				610	// This matches the Package.ImportPath field of go list -json and the
				611	// TestEvent.FailedBuild field of go test -json. Note that it does not
				612	// match TestEvent.Package.
				613	ImportPath string `json:"ImportPath"` // BuildEvent only
				614	// TestEvent only:
				615	// The Package field, if present, specifies the package being tested. When the
				616	// go command runs parallel tests in -json mode, events from different tests are
				617	// interlaced; the Package field allows readers to separate them.
				618	Package string `json:"Package"`
				619	// Action is used in both BuildEvent and TestEvent.
				620	// It is the key to distinguishing between them.
				621	// BuildEvent:
				622	// build-output or build-fail
				623	// TestEvent:
				624	// start, run, pause, cont, pass, bench, fail, output, skip
				625	Action string `json:"Action"`
				626	// TestEvent only:
				627	// The Test field, if present, specifies the test, example, or benchmark function
				628	// that caused the event. Events for the overall package test do not set Test.
				629	Test string `json:"Test"`
				630	// TestEvent only:
				631	// The Elapsed field is set for "pass" and "fail" events. It gives the time elapsed in seconds
				632	// for the specific test or the overall package test that passed or failed.
				633	Elapsed float64
				634	// TestEvent:
				635	// The Output field is set for Action == "output" and is a portion of the
				636	// test's output (standard output and standard error merged together). The
				637	// output is unmodified except that invalid UTF-8 output from a test is coerced
				638	// into valid UTF-8 by use of replacement characters. With that one exception,
				639	// the concatenation of the Output fields of all output events is the exact output
				640	// of the test execution.
				641	// BuildEvent:
				642	// The Output field is set for Action == "build-output" and is a portion of
				643	// the build's output. The concatenation of the Output fields of all output
				644	// events is the exact output of the build. A single event may contain one
				645	// or more lines of output and there may be more than one output event for
				646	// a given ImportPath. This matches the definition of the TestEvent.Output
				647	// field produced by go test -json.
				648	Output string `json:"Output"`
				649	// TestEvent only:
				650	// The FailedBuild field is set for Action == "fail" if the test failure was caused
				651	// by a build failure. It contains the package ID of the package that failed to
				652	// build. This matches the ImportPath field of the "go list" output, as well as the
				653	// BuildEvent.ImportPath field as emitted by "go build -json".
				654	FailedBuild string `json:"FailedBuild"`
				655	}
				656
				657	// parseTestResults converts test output in JSONL format into a slice of testJSON objects
				658	func parseTestResults(testOutput []byte) ([]testJSON, error) {
				659	var results []testJSON
				660	dec := json.NewDecoder(bytes.NewReader(testOutput))
				661	for {
				662	var event testJSON
				663	if err := dec.Decode(&event); err != nil {
				664	if err == io.EOF {
				665	break
				666	}
				667	return nil, err
				668	}
				669	results = append(results, event)
				670	}
				671	return results, nil
				672	}
				673
				674	// testStatus represents the status of a test in a given commit
				675	type testStatus int
				676
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	677	//go:generate go tool golang.org/x/tools/cmd/stringer -type=testStatus -trimprefix=testStatus
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	678	const (
				679	testStatusUnknown testStatus = iota
				680	testStatusPass
				681	testStatusFail
				682	testStatusBuildFail
				683	testStatusSkip
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	684	testStatusNoTests // no tests exist for this package
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	685	)
				686
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	687	// testRegression represents a test that regressed between commits
				688	type testRegression struct {
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	689	Package string
				690	Test string // empty for package tests
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	691	BeforeStatus testStatus
				692	AfterStatus testStatus
				693	Output string // failure output in the after state
				694	}
				695
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	696	func (r *testRegression) Source() string {
				697	if r.Test == "" {
				698	return r.Package
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	699	}
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	700	return fmt.Sprintf("%s.%s", r.Package, r.Test)
				701	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	702
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	703	type packageResult struct {
				704	Status testStatus // overall status for the package
				705	TestStatus map[string]testStatus // name -> status
				706	TestOutput map[string][]string // name -> output parts
				707	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	708
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	709	// collectTestStatuses processes a slice of test events and returns rich status information
				710	func collectTestStatuses(results []testJSON) map[string]*packageResult {
				711	m := make(map[string]*packageResult)
				712
				713	for _, event := range results {
				714	pkg := event.Package
				715	p, ok := m[pkg]
				716	if !ok {
				717	p = new(packageResult)
				718	p.TestStatus = make(map[string]testStatus)
				719	p.TestOutput = make(map[string][]string)
				720	m[pkg] = p
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	721	}
				722
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	723	switch event.Action {
				724	case "output":
				725	p.TestOutput[event.Test] = append(p.TestOutput[event.Test], event.Output)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	726	continue
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	727	case "pass":
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	728	if event.Test == "" {
				729	p.Status = testStatusPass
				730	} else {
				731	p.TestStatus[event.Test] = testStatusPass
				732	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	733	case "fail":
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	734	if event.Test == "" {
				735	if event.FailedBuild != "" {
				736	p.Status = testStatusBuildFail
				737	} else {
				738	p.Status = testStatusFail
				739	}
				740	} else {
				741	p.TestStatus[event.Test] = testStatusFail
				742	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	743	case "skip":
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	744	if event.Test == "" {
				745	p.Status = testStatusNoTests
				746	} else {
				747	p.TestStatus[event.Test] = testStatusSkip
				748	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	749	}
				750	}
				751
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	752	return m
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	753	}
				754
				755	// compareTestResults identifies tests that have regressed between commits
				756	func (r *CodeReviewer) compareTestResults(beforeResults, afterResults []testJSON) ([]testRegression, error) {
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	757	before := collectTestStatuses(beforeResults)
				758	after := collectTestStatuses(afterResults)
				759	var testLevelRegressions []testRegression
				760	var packageLevelRegressions []testRegression
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	761
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	762	afterPkgs := slices.Sorted(maps.Keys(after))
				763	for _, pkg := range afterPkgs {
				764	afterResult := after[pkg]
				765	afterStatus := afterResult.Status
				766	// Can't short-circuit here when tests are passing, because we need to check for skipped tests.
				767	beforeResult, ok := before[pkg]
				768	beforeStatus := testStatusNoTests
				769	if ok {
				770	beforeStatus = beforeResult.Status
				771	}
				772	// If things no longer build, stop at the package level.
				773	// Otherwise, proceed to the test level, so we have more precise information.
				774	if afterStatus == testStatusBuildFail && beforeStatus != testStatusBuildFail {
				775	packageLevelRegressions = append(packageLevelRegressions, testRegression{
				776	Package: pkg,
				777	BeforeStatus: beforeStatus,
				778	AfterStatus: afterStatus,
				779	})
				780	continue
				781	}
				782	tests := slices.Sorted(maps.Keys(afterResult.TestStatus))
				783	for _, test := range tests {
				784	afterStatus := afterResult.TestStatus[test]
				785	switch afterStatus {
				786	case testStatusPass:
				787	continue
				788	case testStatusUnknown:
				789	slog.WarnContext(context.Background(), "unknown test status", "package", pkg, "test", test)
				790	continue
				791	}
				792	beforeStatus := beforeResult.TestStatus[test]
				793	if isRegression(beforeStatus, afterStatus) {
				794	testLevelRegressions = append(testLevelRegressions, testRegression{
				795	Package: pkg,
				796	Test: test,
				797	BeforeStatus: beforeStatus,
				798	AfterStatus: afterStatus,
				799	Output: strings.Join(afterResult.TestOutput[test], ""),
				800	})
				801	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	802	}
				803	}
				804
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	805	// If we have test-level regressions, report only those
				806	// Otherwise, report package-level regressions
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	807	var regressions []testRegression
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	808	if len(testLevelRegressions) > 0 {
				809	regressions = testLevelRegressions
				810	} else {
				811	regressions = packageLevelRegressions
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	812	}
				813
				814	// Sort regressions for consistent output
				815	slices.SortFunc(regressions, func(a, b testRegression) int {
				816	// First by package
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	817	if c := strings.Compare(a.Package, b.Package); c != 0 {
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	818	return c
				819	}
				820	// Then by test name
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	821	return strings.Compare(a.Test, b.Test)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	822	})
				823
				824	return regressions, nil
				825	}
				826
				827	// badnessLevels maps test status to a badness level
				828	// Higher values indicate worse status (more severe issues)
				829	var badnessLevels = map[testStatus]int{
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	830	testStatusBuildFail: 5, // Worst
				831	testStatusFail: 4,
				832	testStatusSkip: 3,
				833	testStatusNoTests: 2,
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	834	testStatusPass: 1,
				835	testStatusUnknown: 0, // Least bad - avoids false positives
				836	}
				837
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	838	// regressionFormatter defines a mapping of before/after state pairs to descriptive messages
				839	type regressionKey struct {
				840	before, after testStatus
				841	}
				842
				843	var regressionMessages = map[regressionKey]string{
				844	{testStatusUnknown, testStatusBuildFail}: "New test has build/vet errors",
				845	{testStatusUnknown, testStatusFail}: "New test is failing",
				846	{testStatusUnknown, testStatusSkip}: "New test is skipped",
				847	{testStatusPass, testStatusBuildFail}: "Was passing, now has build/vet errors",
				848	{testStatusPass, testStatusFail}: "Was passing, now failing",
				849	{testStatusPass, testStatusSkip}: "Was passing, now skipped",
				850	{testStatusNoTests, testStatusBuildFail}: "Previously had no tests, now has build/vet errors",
				851	{testStatusNoTests, testStatusFail}: "Previously had no tests, now has failing tests",
				852	{testStatusNoTests, testStatusSkip}: "Previously had no tests, now has skipped tests",
				853	{testStatusSkip, testStatusBuildFail}: "Was skipped, now has build/vet errors",
				854	{testStatusSkip, testStatusFail}: "Was skipped, now failing",
				855	{testStatusFail, testStatusBuildFail}: "Was failing, now has build/vet errors",
				856	}
				857
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	858	// isRegression determines if a test has regressed based on before and after status
				859	// A regression is defined as an increase in badness level
				860	func isRegression(before, after testStatus) bool {
				861	// Higher badness level means worse status
				862	return badnessLevels[after] > badnessLevels[before]
				863	}
				864
				865	// formatTestRegressions generates a human-readable summary of test regressions
				866	func (r *CodeReviewer) formatTestRegressions(regressions []testRegression) string {
				867	if len(regressions) == 0 {
				868	return ""
				869	}
				870
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	871	buf := new(strings.Builder)
				872	fmt.Fprintf(buf, "Test regressions detected between initial commit (%s) and HEAD:\n\n", r.initialCommit)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	873
				874	for i, reg := range regressions {
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	875	fmt.Fprintf(buf, "%d: %v: ", i+1, reg.Source())
				876	key := regressionKey{reg.BeforeStatus, reg.AfterStatus}
				877	message, exists := regressionMessages[key]
				878	if !exists {
				879	message = "Regression detected"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	880	}
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	881	fmt.Fprintf(buf, "%s\n", message)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	882	}
				883
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	884	return buf.String()
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	885	}
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	886
				887	// RelatedFile represents a file historically related to the changed files
				888	type RelatedFile struct {
				889	Path string // Path to the file
				890	Correlation float64 // Correlation score (0.0-1.0)
				891	}
				892
				893	// findRelatedFiles identifies files that are historically related to the changed files
				894	// by analyzing git commit history for co-occurrences.
				895	func (r *CodeReviewer) findRelatedFiles(ctx context.Context, changedFiles []string) ([]RelatedFile, error) {
				896	commits, err := r.getCommitsTouchingFiles(ctx, changedFiles)
				897	if err != nil {
				898	return nil, fmt.Errorf("failed to get commits touching files: %w", err)
				899	}
				900	if len(commits) == 0 {
				901	return nil, nil
				902	}
				903
				904	relChanged := make(map[string]bool, len(changedFiles))
				905	for _, file := range changedFiles {
				906	rel, err := filepath.Rel(r.repoRoot, file)
				907	if err != nil {
				908	return nil, fmt.Errorf("failed to get relative path for %s: %w", file, err)
				909	}
				910	relChanged[rel] = true
				911	}
				912
				913	historyFiles := make(map[string]int)
				914	var historyMu sync.Mutex
				915
				916	maxWorkers := runtime.GOMAXPROCS(0)
				917	semaphore := make(chan bool, maxWorkers)
				918	var wg sync.WaitGroup
				919
				920	for _, commit := range commits {
				921	wg.Add(1)
				922	semaphore <- true // acquire
				923
				924	go func(commit string) {
				925	defer wg.Done()
				926	defer func() { <-semaphore }() // release
				927	commitFiles, err := r.getFilesInCommit(ctx, commit)
				928	if err != nil {
				929	slog.WarnContext(ctx, "Failed to get files in commit", "commit", commit, "err", err)
				930	return
				931	}
				932	incr := 0
				933	for _, file := range commitFiles {
				934	if relChanged[file] {
				935	incr++
				936	}
				937	}
				938	if incr == 0 {
				939	return
				940	}
				941	historyMu.Lock()
				942	defer historyMu.Unlock()
				943	for _, file := range commitFiles {
				944	historyFiles[file] += incr
				945	}
				946	}(commit)
				947	}
				948	wg.Wait()
				949
				950	// normalize
				951	maxCount := 0
				952	for _, count := range historyFiles {
				953	maxCount = max(maxCount, count)
				954	}
				955	if maxCount == 0 {
				956	return nil, nil
				957	}
				958
				959	var relatedFiles []RelatedFile
				960	for file, count := range historyFiles {
				961	if relChanged[file] {
				962	// Don't include inputs in the output.
				963	continue
				964	}
				965	correlation := float64(count) / float64(maxCount)
				966	// Require min correlation to avoid noise
				967	if correlation >= 0.1 {
				968	relatedFiles = append(relatedFiles, RelatedFile{Path: file, Correlation: correlation})
				969	}
				970	}
				971
				972	// Highest correlation first
				973	slices.SortFunc(relatedFiles, func(a, b RelatedFile) int {
				974	return -1 * cmp.Compare(a.Correlation, b.Correlation)
				975	})
				976
				977	// Limit to 1 correlated file per input file.
				978	// (Arbitrary limit, to be adjusted.)
				979	maxFiles := len(changedFiles)
				980	if len(relatedFiles) > maxFiles {
				981	relatedFiles = relatedFiles[:maxFiles]
				982	}
				983
				984	// TODO: add an LLM in the mix here (like the keyword search tool) to do a filtering pass,
				985	// and then increase the strength of the wording in the relatedFiles message.
				986
				987	return relatedFiles, nil
				988	}
				989
				990	// getCommitsTouchingFiles returns all commits that touch any of the specified files
				991	func (r *CodeReviewer) getCommitsTouchingFiles(ctx context.Context, files []string) ([]string, error) {
				992	if len(files) == 0 {
				993	return nil, nil
				994	}
Josh Bleecher Snyder	d4b1cc7	2025-04-29 13:49:18 +0000	[diff] [blame]	995	fileArgs := append([]string{"rev-list", "--all", "--date-order", "--max-count=100", "--"}, files...)
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	996	cmd := exec.CommandContext(ctx, "git", fileArgs...)
				997	cmd.Dir = r.repoRoot
				998	out, err := cmd.CombinedOutput()
				999	if err != nil {
				1000	return nil, fmt.Errorf("failed to get commits: %w\n%s", err, out)
				1001	}
				1002	return nonEmptyTrimmedLines(out), nil
				1003	}
				1004
				1005	// getFilesInCommit returns all files changed in a specific commit
				1006	func (r *CodeReviewer) getFilesInCommit(ctx context.Context, commit string) ([]string, error) {
				1007	cmd := exec.CommandContext(ctx, "git", "diff-tree", "--no-commit-id", "--name-only", "-r", commit)
				1008	cmd.Dir = r.repoRoot
				1009	out, err := cmd.CombinedOutput()
				1010	if err != nil {
				1011	return nil, fmt.Errorf("failed to get files in commit: %w\n%s", err, out)
				1012	}
				1013	return nonEmptyTrimmedLines(out), nil
				1014	}
				1015
				1016	func nonEmptyTrimmedLines(b []byte) []string {
				1017	var lines []string
				1018	for line := range strings.Lines(string(b)) {
				1019	line = strings.TrimSpace(line)
				1020	if line != "" {
				1021	lines = append(lines, line)
				1022	}
				1023	}
				1024	return lines
				1025	}
				1026
				1027	// formatRelatedFiles formats the related files list into a human-readable message
				1028	func (r *CodeReviewer) formatRelatedFiles(files []RelatedFile) string {
				1029	if len(files) == 0 {
				1030	return ""
				1031	}
				1032
				1033	buf := new(strings.Builder)
				1034
				1035	fmt.Fprintf(buf, "Potentially related files:\n\n")
				1036
				1037	for _, file := range files {
				1038	fmt.Fprintf(buf, "- %s (%0.0f%%)\n", file.Path, 100*file.Correlation)
				1039	}
				1040
				1041	fmt.Fprintf(buf, "\nThese files have historically changed with the files you have modified. Consider whether they require updates as well.\n")
				1042	return buf.String()
				1043	}