Blame - claudetool/codereview/differential.go - sketch

blob: 6c29d064234515e95b89d5b716a2b3b973129049 [file] [log] [blame]

Josh Bleecher Snyder	f4047bb	2025-05-05 23:02:56 +0000	[diff] [blame]	1	package codereview
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	2
				3	import (
				4	"bytes"
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	5	"cmp"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	6	"context"
Josh Bleecher Snyder	26b6f9b	2025-07-01 01:41:11 +0000	[diff] [blame]	7	"crypto/sha256"
				8	"encoding/hex"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	9	"encoding/json"
				10	"fmt"
				11	"io"
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	12	"io/fs"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	13	"log/slog"
				14	"maps"
				15	"os"
				16	"os/exec"
				17	"path/filepath"
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	18	"runtime"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	19	"slices"
				20	"strings"
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	21	"sync"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	22	"time"
				23
				24	"golang.org/x/tools/go/packages"
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	25	"sketch.dev/llm"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	26	)
				27
				28	// This file does differential quality analysis of a commit relative to a base commit.
				29
				30	// Tool returns a tool spec for a CodeReview tool backed by r.
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	31	func (r CodeReviewer) Tool() llm.Tool {
				32	spec := &llm.Tool{
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	33	Name: "codereview",
Josh Bleecher Snyder	272c4a9	2025-06-04 16:08:42 -0700	[diff] [blame]	34	Description: `Run an automated code review before presenting git commits to the user. Call if/when you've completed your current work and are ready for user feedback.`,
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	35	// If you modify this, update the termui template for prettier rendering.
philip.zeyliger	57162e0	2025-06-18 09:59:48 -0700	[diff] [blame]	36	InputSchema: json.RawMessage(`{
				37	"type": "object",
				38	"properties": {
				39	"timeout": {
				40	"type": "string",
				41	"description": "Timeout as a Go duration string (default: 1m)",
				42	"default": "1m"
				43	}
				44	}
				45	}`),
				46	Run: r.Run,
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	47	}
				48	return spec
				49	}
				50
Josh Bleecher Snyder	43b60b9	2025-07-21 14:57:10 -0700	[diff] [blame]	51	func (r *CodeReviewer) Run(ctx context.Context, m json.RawMessage) llm.ToolOut {
philip.zeyliger	57162e0	2025-06-18 09:59:48 -0700	[diff] [blame]	52	// Parse input to get timeout
				53	var input struct {
				54	Timeout string `json:"timeout"`
				55	}
				56	if len(m) > 0 {
				57	if err := json.Unmarshal(m, &input); err != nil {
Josh Bleecher Snyder	43b60b9	2025-07-21 14:57:10 -0700	[diff] [blame]	58	return llm.ErrorfToolOut("failed to parse input: %w", err)
philip.zeyliger	57162e0	2025-06-18 09:59:48 -0700	[diff] [blame]	59	}
				60	}
				61	if input.Timeout == "" {
				62	input.Timeout = "1m" // default timeout
				63	}
				64
				65	// Parse timeout duration
				66	timeout, err := time.ParseDuration(input.Timeout)
				67	if err != nil {
Josh Bleecher Snyder	43b60b9	2025-07-21 14:57:10 -0700	[diff] [blame]	68	return llm.ErrorfToolOut("invalid timeout duration %q: %w", input.Timeout, err)
philip.zeyliger	57162e0	2025-06-18 09:59:48 -0700	[diff] [blame]	69	}
				70
				71	// Create timeout context
				72	timeoutCtx, cancel := context.WithTimeout(ctx, timeout)
				73	defer cancel()
				74
Josh Bleecher Snyder	2dc86b9	2025-04-29 14:11:58 +0000	[diff] [blame]	75	// NOTE: If you add or modify error messages here, update the corresponding UI parsing in:
				76	// webui/src/web-components/sketch-tool-card.ts (SketchToolCardCodeReview.getStatusIcon)
philip.zeyliger	57162e0	2025-06-18 09:59:48 -0700	[diff] [blame]	77	if err := r.RequireNormalGitState(timeoutCtx); err != nil {
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	78	slog.DebugContext(ctx, "CodeReviewer.Run: failed to check for normal git state", "err", err)
Josh Bleecher Snyder	43b60b9	2025-07-21 14:57:10 -0700	[diff] [blame]	79	return llm.ErrorToolOut(err)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	80	}
philip.zeyliger	57162e0	2025-06-18 09:59:48 -0700	[diff] [blame]	81	if err := r.RequireNoUncommittedChanges(timeoutCtx); err != nil {
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	82	slog.DebugContext(ctx, "CodeReviewer.Run: failed to check for uncommitted changes", "err", err)
Josh Bleecher Snyder	43b60b9	2025-07-21 14:57:10 -0700	[diff] [blame]	83	return llm.ErrorToolOut(err)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	84	}
				85
				86	// Check that the current commit is not the initial commit
philip.zeyliger	57162e0	2025-06-18 09:59:48 -0700	[diff] [blame]	87	currentCommit, err := r.CurrentCommit(timeoutCtx)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	88	if err != nil {
				89	slog.DebugContext(ctx, "CodeReviewer.Run: failed to get current commit", "err", err)
Josh Bleecher Snyder	43b60b9	2025-07-21 14:57:10 -0700	[diff] [blame]	90	return llm.ErrorToolOut(err)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	91	}
				92	if r.IsInitialCommit(currentCommit) {
				93	slog.DebugContext(ctx, "CodeReviewer.Run: current commit is initial commit, nothing to review")
Josh Bleecher Snyder	43b60b9	2025-07-21 14:57:10 -0700	[diff] [blame]	94	return llm.ErrorToolOut(fmt.Errorf("no new commits have been added, nothing to review"))
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	95	}
				96
				97	// No matter what failures happen from here out, we will declare this to have been reviewed.
				98	// This should help avoid the model getting blocked by a broken code review tool.
				99	r.reviewed = append(r.reviewed, currentCommit)
				100
philip.zeyliger	57162e0	2025-06-18 09:59:48 -0700	[diff] [blame]	101	changedFiles, err := r.changedFiles(timeoutCtx, r.sketchBaseRef, currentCommit)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	102	if err != nil {
				103	slog.DebugContext(ctx, "CodeReviewer.Run: failed to get changed files", "err", err)
Josh Bleecher Snyder	43b60b9	2025-07-21 14:57:10 -0700	[diff] [blame]	104	return llm.ErrorToolOut(err)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	105	}
				106
				107	// Prepare to analyze before/after for the impacted files.
				108	// We use the current commit to determine what packages exist and are impacted.
				109	// The packages in the initial commit may be different.
				110	// Good enough for now.
				111	// TODO: do better
philip.zeyliger	57162e0	2025-06-18 09:59:48 -0700	[diff] [blame]	112	allPkgs, err := r.packagesForFiles(timeoutCtx, changedFiles)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	113	if err != nil {
				114	// TODO: log and skip to stuff that doesn't require packages
				115	slog.DebugContext(ctx, "CodeReviewer.Run: failed to get packages for files", "err", err)
Josh Bleecher Snyder	43b60b9	2025-07-21 14:57:10 -0700	[diff] [blame]	116	return llm.ErrorToolOut(err)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	117	}
				118	allPkgList := slices.Collect(maps.Keys(allPkgs))
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	119
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	120	var errorMessages []string // problems we want the model to address
				121	var infoMessages []string // info the model should consider
				122
Josh Bleecher Snyder	cf19190	2025-06-04 18:18:40 +0000	[diff] [blame]	123	// Run 'go generate' early, so that it can potentially fix tests that would otherwise fail.
philip.zeyliger	57162e0	2025-06-18 09:59:48 -0700	[diff] [blame]	124	generateChanges, err := r.runGenerate(timeoutCtx, allPkgList)
Josh Bleecher Snyder	cf19190	2025-06-04 18:18:40 +0000	[diff] [blame]	125	if err != nil {
				126	errorMessages = append(errorMessages, err.Error())
				127	}
				128	if len(generateChanges) > 0 {
				129	buf := new(strings.Builder)
				130	buf.WriteString("The following files were changed by running `go generate`:\n\n")
				131	for _, f := range generateChanges {
				132	buf.WriteString(f)
				133	buf.WriteString("\n")
				134	}
				135	buf.WriteString("\nPlease amend your latest git commit with these changes.\n")
				136	infoMessages = append(infoMessages, buf.String())
				137	}
				138
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	139	// Find potentially related files that should also be considered
				140	// TODO: add some caching here, since this depends only on the initial commit and the changed files, not the details of the changes
Josh Bleecher Snyder	cf19190	2025-06-04 18:18:40 +0000	[diff] [blame]	141	// TODO: arrange for this to run even in non-Go repos!
philip.zeyliger	57162e0	2025-06-18 09:59:48 -0700	[diff] [blame]	142	relatedFiles, err := r.findRelatedFiles(timeoutCtx, changedFiles)
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	143	if err != nil {
				144	slog.DebugContext(ctx, "CodeReviewer.Run: failed to find related files", "err", err)
				145	} else {
				146	relatedMsg := r.formatRelatedFiles(relatedFiles)
				147	if relatedMsg != "" {
				148	infoMessages = append(infoMessages, relatedMsg)
				149	}
				150	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	151
philip.zeyliger	57162e0	2025-06-18 09:59:48 -0700	[diff] [blame]	152	testMsg, err := r.checkTests(timeoutCtx, allPkgList)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	153	if err != nil {
				154	slog.DebugContext(ctx, "CodeReviewer.Run: failed to check tests", "err", err)
Josh Bleecher Snyder	43b60b9	2025-07-21 14:57:10 -0700	[diff] [blame]	155	return llm.ErrorToolOut(err)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	156	}
				157	if testMsg != "" {
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	158	errorMessages = append(errorMessages, testMsg)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	159	}
				160
philip.zeyliger	57162e0	2025-06-18 09:59:48 -0700	[diff] [blame]	161	goplsMsg, err := r.checkGopls(timeoutCtx, changedFiles) // includes vet checks
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	162	if err != nil {
				163	slog.DebugContext(ctx, "CodeReviewer.Run: failed to check gopls", "err", err)
Josh Bleecher Snyder	43b60b9	2025-07-21 14:57:10 -0700	[diff] [blame]	164	return llm.ErrorToolOut(err)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	165	}
				166	if goplsMsg != "" {
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	167	errorMessages = append(errorMessages, goplsMsg)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	168	}
				169
Josh Bleecher Snyder	2dc86b9	2025-04-29 14:11:58 +0000	[diff] [blame]	170	// NOTE: If you change this output format, update the corresponding UI parsing in:
				171	// webui/src/web-components/sketch-tool-card.ts (SketchToolCardCodeReview.getStatusIcon)
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	172	buf := new(strings.Builder)
				173	if len(infoMessages) > 0 {
				174	buf.WriteString("# Info\n\n")
				175	buf.WriteString(strings.Join(infoMessages, "\n\n"))
				176	buf.WriteString("\n\n")
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	177	}
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	178	if len(errorMessages) > 0 {
				179	buf.WriteString("# Errors\n\n")
				180	buf.WriteString(strings.Join(errorMessages, "\n\n"))
				181	buf.WriteString("\n\nPlease fix before proceeding.\n")
				182	}
				183	if buf.Len() == 0 {
				184	buf.WriteString("OK")
				185	}
Josh Bleecher Snyder	43b60b9	2025-07-21 14:57:10 -0700	[diff] [blame]	186	return llm.ToolOut{LLMContent: llm.TextContent(buf.String())}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	187	}
				188
				189	func (r *CodeReviewer) initializeInitialCommitWorktree(ctx context.Context) error {
				190	if r.initialWorktree != "" {
				191	return nil
				192	}
				193	tmpDir, err := os.MkdirTemp("", "sketch-codereview-worktree")
				194	if err != nil {
				195	return err
				196	}
Philip Zeyliger	49edc92	2025-05-14 09:45:45 -0700	[diff] [blame]	197	worktreeCmd := exec.CommandContext(ctx, "git", "worktree", "add", "--detach", tmpDir, r.sketchBaseRef)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	198	worktreeCmd.Dir = r.repoRoot
				199	out, err := worktreeCmd.CombinedOutput()
				200	if err != nil {
				201	return fmt.Errorf("unable to create worktree for initial commit: %w\n%s", err, out)
				202	}
				203	r.initialWorktree = tmpDir
				204	return nil
				205	}
				206
				207	func (r *CodeReviewer) checkTests(ctx context.Context, pkgList []string) (string, error) {
Josh Bleecher Snyder	de5f744	2025-05-15 18:32:32 +0000	[diff] [blame]	208	// 'gopls check' covers everything that 'go vet' covers.
				209	// Disabling vet here speeds things up, and allows more precise filtering and reporting.
				210	goTestArgs := []string{"test", "-json", "-v", "-vet=off"}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	211	goTestArgs = append(goTestArgs, pkgList...)
				212
				213	afterTestCmd := exec.CommandContext(ctx, "go", goTestArgs...)
				214	afterTestCmd.Dir = r.repoRoot
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	215	afterTestOut, _ := afterTestCmd.Output()
				216	// unfortunately, we can't short-circuit here even if all tests pass,
				217	// because we need to check for skipped tests.
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	218
				219	err := r.initializeInitialCommitWorktree(ctx)
				220	if err != nil {
				221	return "", err
				222	}
				223
				224	beforeTestCmd := exec.CommandContext(ctx, "go", goTestArgs...)
				225	beforeTestCmd.Dir = r.initialWorktree
				226	beforeTestOut, _ := beforeTestCmd.Output() // ignore error, interesting info is in the output
				227
				228	// Parse the jsonl test results
				229	beforeResults, beforeParseErr := parseTestResults(beforeTestOut)
				230	if beforeParseErr != nil {
				231	return "", fmt.Errorf("unable to parse test results for initial commit: %w\n%s", beforeParseErr, beforeTestOut)
				232	}
				233	afterResults, afterParseErr := parseTestResults(afterTestOut)
				234	if afterParseErr != nil {
				235	return "", fmt.Errorf("unable to parse test results for current commit: %w\n%s", afterParseErr, afterTestOut)
				236	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	237	testRegressions, err := r.compareTestResults(beforeResults, afterResults)
				238	if err != nil {
				239	return "", fmt.Errorf("failed to compare test results: %w", err)
				240	}
				241	// TODO: better output formatting?
				242	res := r.formatTestRegressions(testRegressions)
				243	return res, nil
				244	}
				245
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	246	// GoplsIssue represents a single issue reported by gopls check
				247	type GoplsIssue struct {
				248	Position string // File position in format "file:line:col-range"
				249	Message string // Description of the issue
				250	}
				251
Josh Bleecher Snyder	de5f744	2025-05-15 18:32:32 +0000	[diff] [blame]	252	// goplsIgnore contains substring patterns for gopls (and vet) diagnostic messages that should be suppressed.
				253	var goplsIgnore = []string{
				254	// these are often just wrong, see https://github.com/golang/go/issues/57059#issuecomment-2884771470
				255	"ends with redundant newline",
				256
				257	// as of May 2025, Claude doesn't understand strings/bytes.SplitSeq well enough to use it
				258	"SplitSeq",
				259	}
				260
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	261	// checkGopls runs gopls check on the provided files in both the current and initial state,
				262	// compares the results, and reports any new issues introduced in the current state.
				263	func (r *CodeReviewer) checkGopls(ctx context.Context, changedFiles []string) (string, error) {
				264	if len(changedFiles) == 0 {
				265	return "", nil // no files to check
				266	}
				267
				268	// Filter out non-Go files as gopls only works on Go files
				269	// and verify they still exist (not deleted)
				270	var goFiles []string
				271	for _, file := range changedFiles {
				272	if !strings.HasSuffix(file, ".go") {
				273	continue // not a Go file
				274	}
				275
				276	// Check if the file still exists (not deleted)
				277	if _, err := os.Stat(file); os.IsNotExist(err) {
				278	continue // file doesn't exist anymore (deleted)
				279	}
				280
				281	goFiles = append(goFiles, file)
				282	}
				283
				284	if len(goFiles) == 0 {
				285	return "", nil // no Go files to check
				286	}
				287
				288	// Run gopls check on the current state
				289	goplsArgs := append([]string{"check"}, goFiles...)
				290
				291	afterGoplsCmd := exec.CommandContext(ctx, "gopls", goplsArgs...)
				292	afterGoplsCmd.Dir = r.repoRoot
				293	afterGoplsOut, err := afterGoplsCmd.CombinedOutput() // gopls returns non-zero if it finds issues
				294	if err != nil {
				295	// Check if the output looks like real gopls issues or if it's just error output
				296	if !looksLikeGoplsIssues(afterGoplsOut) {
				297	slog.WarnContext(ctx, "CodeReviewer.checkGopls: gopls check failed to run properly", "err", err, "output", string(afterGoplsOut))
				298	return "", nil // Skip rather than failing the entire code review
				299	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	300	}
				301
				302	// Parse the output
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	303	afterIssues := parseGoplsOutput(r.repoRoot, afterGoplsOut)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	304
				305	// If no issues were found, we're done
				306	if len(afterIssues) == 0 {
				307	return "", nil
				308	}
				309
				310	// Gopls detected issues in the current state, check if they existed in the initial state
				311	initErr := r.initializeInitialCommitWorktree(ctx)
				312	if initErr != nil {
				313	return "", err
				314	}
				315
				316	// For each file that exists in the initial commit, run gopls check
				317	var initialFilesToCheck []string
				318	for _, file := range goFiles {
				319	// Get relative path for git operations
				320	relFile, err := filepath.Rel(r.repoRoot, file)
				321	if err != nil {
				322	slog.WarnContext(ctx, "CodeReviewer.checkGopls: failed to get relative path", "repo_root", r.repoRoot, "file", file, "err", err)
				323	continue
				324	}
				325
				326	// Check if the file exists in the initial commit
Philip Zeyliger	49edc92	2025-05-14 09:45:45 -0700	[diff] [blame]	327	checkCmd := exec.CommandContext(ctx, "git", "cat-file", "-e", fmt.Sprintf("%s:%s", r.sketchBaseRef, relFile))
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	328	checkCmd.Dir = r.repoRoot
				329	if err := checkCmd.Run(); err == nil {
				330	// File exists in initial commit
				331	initialFilePath := filepath.Join(r.initialWorktree, relFile)
				332	initialFilesToCheck = append(initialFilesToCheck, initialFilePath)
				333	}
				334	}
				335
				336	// Run gopls check on the files that existed in the initial commit
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	337	var beforeIssues []GoplsIssue
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	338	if len(initialFilesToCheck) > 0 {
				339	beforeGoplsArgs := append([]string{"check"}, initialFilesToCheck...)
				340	beforeGoplsCmd := exec.CommandContext(ctx, "gopls", beforeGoplsArgs...)
				341	beforeGoplsCmd.Dir = r.initialWorktree
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	342	beforeGoplsOut, beforeCmdErr := beforeGoplsCmd.CombinedOutput()
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	343	if beforeCmdErr != nil && !looksLikeGoplsIssues(beforeGoplsOut) {
				344	// If gopls fails to run properly on the initial commit, log a warning and continue
				345	// with empty before issues - this will be conservative and report more issues
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	346	slog.WarnContext(ctx, "CodeReviewer.checkGopls: gopls check failed on initial commit", "err", err, "output", string(beforeGoplsOut))
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	347	} else {
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	348	beforeIssues = parseGoplsOutput(r.initialWorktree, beforeGoplsOut)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	349	}
				350	}
				351
				352	// Find new issues that weren't present in the initial state
				353	goplsRegressions := findGoplsRegressions(beforeIssues, afterIssues)
				354	if len(goplsRegressions) == 0 {
				355	return "", nil // no new issues
				356	}
				357
				358	// Format the results
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	359	return r.formatGoplsRegressions(goplsRegressions), nil
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	360	}
				361
Josh Bleecher Snyder	de5f744	2025-05-15 18:32:32 +0000	[diff] [blame]	362	// parseGoplsOutput parses the text output from gopls check.
				363	// It drops any that match the patterns in goplsIgnore.
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	364	// Each line has the format: '/path/to/file.go:448:22-26: unused parameter: path'
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	365	func parseGoplsOutput(root string, output []byte) []GoplsIssue {
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	366	var issues []GoplsIssue
Josh Bleecher Snyder	f4047bb	2025-05-05 23:02:56 +0000	[diff] [blame]	367	for line := range strings.Lines(string(output)) {
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	368	line = strings.TrimSpace(line)
				369	if line == "" {
				370	continue
				371	}
				372
				373	// Skip lines that look like error messages rather than gopls issues
				374	if strings.HasPrefix(line, "Error:") \|\|
				375	strings.HasPrefix(line, "Failed:") \|\|
				376	strings.HasPrefix(line, "Warning:") \|\|
				377	strings.HasPrefix(line, "gopls:") {
				378	continue
				379	}
				380
				381	// Find the first colon that separates the file path from the line number
				382	firstColonIdx := strings.Index(line, ":")
				383	if firstColonIdx < 0 {
				384	continue // Invalid format
				385	}
				386
				387	// Verify the part before the first colon looks like a file path
				388	potentialPath := line[:firstColonIdx]
				389	if !strings.HasSuffix(potentialPath, ".go") {
				390	continue // Not a Go file path
				391	}
				392
				393	// Find the position of the first message separator ': '
				394	// This separates the position info from the message
				395	messageStart := strings.Index(line, ": ")
				396	if messageStart < 0 \|\| messageStart <= firstColonIdx {
				397	continue // Invalid format
				398	}
				399
				400	// Extract position and message
				401	position := line[:messageStart]
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	402	rel, err := filepath.Rel(root, position)
				403	if err == nil {
				404	position = rel
				405	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	406	message := line[messageStart+2:] // Skip the ': ' separator
				407
				408	// Verify position has the expected format (at least 2 colons for line:col)
				409	colonCount := strings.Count(position, ":")
				410	if colonCount < 2 {
				411	continue // Not enough position information
				412	}
				413
Josh Bleecher Snyder	de5f744	2025-05-15 18:32:32 +0000	[diff] [blame]	414	// Skip diagnostics that match any of our ignored patterns
				415	if shouldIgnoreDiagnostic(message) {
				416	continue
				417	}
				418
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	419	issues = append(issues, GoplsIssue{
				420	Position: position,
				421	Message: message,
				422	})
				423	}
				424
				425	return issues
				426	}
				427
				428	// looksLikeGoplsIssues checks if the output appears to be actual gopls issues
				429	// rather than error messages about gopls itself failing
				430	func looksLikeGoplsIssues(output []byte) bool {
				431	// If output is empty, it's not valid issues
				432	if len(output) == 0 {
				433	return false
				434	}
				435
				436	// Check if output has at least one line that looks like a gopls issue
				437	// A gopls issue looks like: '/path/to/file.go:123:45-67: message'
Josh Bleecher Snyder	f4047bb	2025-05-05 23:02:56 +0000	[diff] [blame]	438	for line := range strings.Lines(string(output)) {
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	439	line = strings.TrimSpace(line)
				440	if line == "" {
				441	continue
				442	}
				443
				444	// A gopls issue has at least two colons (file path, line number, column)
				445	// and contains a colon followed by a space (separating position from message)
				446	colonCount := strings.Count(line, ":")
				447	hasSeparator := strings.Contains(line, ": ")
				448
				449	if colonCount >= 2 && hasSeparator {
				450	// Check if it starts with a likely file path (ending in .go)
				451	parts := strings.SplitN(line, ":", 2)
				452	if strings.HasSuffix(parts[0], ".go") {
				453	return true
				454	}
				455	}
				456	}
				457	return false
				458	}
				459
				460	// normalizeGoplsPosition extracts just the file path from a position string
				461	func normalizeGoplsPosition(position string) string {
				462	// Extract just the file path by taking everything before the first colon
				463	parts := strings.Split(position, ":")
				464	if len(parts) < 1 {
				465	return position
				466	}
				467	return parts[0]
				468	}
				469
				470	// findGoplsRegressions identifies gopls issues that are new in the after state
				471	func findGoplsRegressions(before, after []GoplsIssue) []GoplsIssue {
				472	var regressions []GoplsIssue
				473
				474	// Build map of before issues for easier lookup
				475	beforeIssueMap := make(map[string]map[string]bool) // file -> message -> exists
				476	for _, issue := range before {
				477	file := normalizeGoplsPosition(issue.Position)
				478	if _, exists := beforeIssueMap[file]; !exists {
				479	beforeIssueMap[file] = make(map[string]bool)
				480	}
				481	// Store both the exact message and the general issue type for fuzzy matching
				482	beforeIssueMap[file][issue.Message] = true
				483
				484	// Extract the general issue type (everything before the first ':' in the message)
				485	generalIssue := issue.Message
				486	if colonIdx := strings.Index(issue.Message, ":"); colonIdx > 0 {
				487	generalIssue = issue.Message[:colonIdx]
				488	}
				489	beforeIssueMap[file][generalIssue] = true
				490	}
				491
				492	// Check each after issue to see if it's new
				493	for _, afterIssue := range after {
				494	file := normalizeGoplsPosition(afterIssue.Position)
				495	isNew := true
				496
				497	if fileIssues, fileExists := beforeIssueMap[file]; fileExists {
				498	// Check for exact message match
				499	if fileIssues[afterIssue.Message] {
				500	isNew = false
				501	} else {
				502	// Check for general issue type match
				503	generalIssue := afterIssue.Message
				504	if colonIdx := strings.Index(afterIssue.Message, ":"); colonIdx > 0 {
				505	generalIssue = afterIssue.Message[:colonIdx]
				506	}
				507	if fileIssues[generalIssue] {
				508	isNew = false
				509	}
				510	}
				511	}
				512
				513	if isNew {
				514	regressions = append(regressions, afterIssue)
				515	}
				516	}
				517
				518	// Sort regressions for deterministic output
				519	slices.SortFunc(regressions, func(a, b GoplsIssue) int {
				520	return strings.Compare(a.Position, b.Position)
				521	})
				522
				523	return regressions
				524	}
				525
				526	// formatGoplsRegressions generates a human-readable summary of gopls check regressions
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	527	func (r *CodeReviewer) formatGoplsRegressions(regressions []GoplsIssue) string {
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	528	if len(regressions) == 0 {
				529	return ""
				530	}
				531
				532	var sb strings.Builder
				533	sb.WriteString("Gopls check issues detected:\n\n")
				534
				535	// Format each issue
				536	for i, issue := range regressions {
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	537	sb.WriteString(fmt.Sprintf("%d. %s: %s\n", i+1, filepath.Join(r.repoRoot, issue.Position), issue.Message))
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	538	}
				539
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	540	sb.WriteString("\nIMPORTANT: Only fix new gopls check issues in parts of the code that you have already edited.")
				541	sb.WriteString(" Do not change existing code that was not part of your current edits.\n")
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	542	return sb.String()
				543	}
				544
				545	func (r *CodeReviewer) HasReviewed(commit string) bool {
				546	return slices.Contains(r.reviewed, commit)
				547	}
				548
				549	func (r *CodeReviewer) IsInitialCommit(commit string) bool {
Philip Zeyliger	49edc92	2025-05-14 09:45:45 -0700	[diff] [blame]	550	return commit == r.sketchBaseRef
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	551	}
				552
Philip Zeyliger	49edc92	2025-05-14 09:45:45 -0700	[diff] [blame]	553	// requireHEADDescendantOfSketchBaseRef returns an error if HEAD is not a descendant of r.initialCommit.
Josh Bleecher Snyder	c72ceb2	2025-05-05 23:30:15 +0000	[diff] [blame]	554	// This serves two purposes:
				555	// - ensures we're not still on the initial commit
				556	// - ensures we're not on a separate branch or upstream somewhere, which would be confusing
Philip Zeyliger	49edc92	2025-05-14 09:45:45 -0700	[diff] [blame]	557	func (r *CodeReviewer) requireHEADDescendantOfSketchBaseRef(ctx context.Context) error {
Josh Bleecher Snyder	c72ceb2	2025-05-05 23:30:15 +0000	[diff] [blame]	558	head, err := r.CurrentCommit(ctx)
				559	if err != nil {
				560	return err
				561	}
				562
				563	// Note: Git's merge-base --is-ancestor checks strict ancestry (i.e., <), so a commit is NOT an ancestor of itself.
Philip Zeyliger	49edc92	2025-05-14 09:45:45 -0700	[diff] [blame]	564	cmd := exec.CommandContext(ctx, "git", "merge-base", "--is-ancestor", r.sketchBaseRef, head)
Josh Bleecher Snyder	c72ceb2	2025-05-05 23:30:15 +0000	[diff] [blame]	565	cmd.Dir = r.repoRoot
				566	err = cmd.Run()
				567	if err != nil {
				568	if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
				569	// Exit code 1 means not an ancestor
				570	return fmt.Errorf("HEAD is not a descendant of the initial commit")
				571	}
				572	return fmt.Errorf("failed to check whether initial commit is ancestor: %w", err)
				573	}
				574	return nil
				575	}
				576
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	577	// packagesForFiles returns maps of packages related to the given files:
				578	// 1. directPkgs: packages that directly contain the changed files
				579	// 2. allPkgs: all packages that might be affected, including downstream packages that depend on the direct packages
				580	// It may include false positives.
				581	// Files must be absolute paths!
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	582	func (r CodeReviewer) packagesForFiles(ctx context.Context, files []string) (allPkgs map[string]packages.Package, err error) {
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	583	for _, f := range files {
				584	if !filepath.IsAbs(f) {
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	585	return nil, fmt.Errorf("path %q is not absolute", f)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	586	}
				587	}
				588	cfg := &packages.Config{
				589	Mode: packages.LoadImports \| packages.NeedEmbedFiles,
				590	Context: ctx,
				591	// Logf: func(msg string, args ...any) {
				592	// slog.DebugContext(ctx, "loading go packages", "msg", fmt.Sprintf(msg, args...))
				593	// },
				594	// TODO: in theory, go.mod might not be in the repo root, and there might be multiple go.mod files.
				595	// We can cross that bridge when we get there.
				596	Dir: r.repoRoot,
				597	Tests: true,
				598	}
				599	universe, err := packages.Load(cfg, "./...")
				600	if err != nil {
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	601	return nil, err
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	602	}
				603	// Identify packages that directly contain the changed files
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	604	directPkgs := make(map[string]*packages.Package) // import path -> package
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	605	for _, pkg := range universe {
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	606	pkgFiles := allFiles(pkg)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	607	for _, file := range files {
				608	if pkgFiles[file] {
				609	// prefer test packages, as they contain strictly more files (right?)
				610	prev := directPkgs[pkg.PkgPath]
				611	if prev == nil \|\| prev.ForTest == "" {
				612	directPkgs[pkg.PkgPath] = pkg
				613	}
				614	}
				615	}
				616	}
				617
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	618	allPkgs = maps.Clone(directPkgs)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	619
				620	// Add packages that depend on the direct packages
				621	addDependentPackages(universe, allPkgs)
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	622	return allPkgs, nil
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	623	}
				624
				625	// allFiles returns all files that might be referenced by the package.
				626	// It may contain false positives.
				627	func allFiles(p *packages.Package) map[string]bool {
				628	files := make(map[string]bool)
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	629	// Add files from package info
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	630	add := [][]string{p.GoFiles, p.CompiledGoFiles, p.OtherFiles, p.EmbedFiles, p.IgnoredFiles}
				631	for _, extra := range add {
				632	for _, file := range extra {
				633	files[file] = true
				634	}
				635	}
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	636	// Add files from testdata directory
				637	testdataDir := filepath.Join(p.Dir, "testdata")
				638	if _, err := os.Stat(testdataDir); err == nil {
				639	fsys := os.DirFS(p.Dir)
				640	fs.WalkDir(fsys, "testdata", func(path string, d fs.DirEntry, err error) error {
				641	if err == nil && !d.IsDir() {
				642	files[filepath.Join(p.Dir, path)] = true
				643	}
				644	return nil
				645	})
				646	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	647	return files
				648	}
				649
				650	// addDependentPackages adds to pkgs all packages from universe
				651	// that directly or indirectly depend on any package already in pkgs.
				652	func addDependentPackages(universe []packages.Package, pkgs map[string]packages.Package) {
				653	for {
				654	changed := false
				655	for _, p := range universe {
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	656	if strings.HasSuffix(p.PkgPath, ".test") { // ick, but I don't see another way
				657	// skip test packages
				658	continue
				659	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	660	if _, ok := pkgs[p.PkgPath]; ok {
				661	// already in pkgs
				662	continue
				663	}
				664	for importPath := range p.Imports {
				665	if _, ok := pkgs[importPath]; ok {
				666	// imports a package dependent on pkgs, add it
				667	pkgs[p.PkgPath] = p
				668	changed = true
				669	break
				670	}
				671	}
				672	}
				673	if !changed {
				674	break
				675	}
				676	}
				677	}
				678
				679	// testJSON is a union of BuildEvent and TestEvent
				680	type testJSON struct {
				681	// TestEvent only:
				682	// The Time field holds the time the event happened. It is conventionally omitted
				683	// for cached test results.
				684	Time time.Time `json:"Time"`
				685	// BuildEvent only:
				686	// The ImportPath field gives the package ID of the package being built.
				687	// This matches the Package.ImportPath field of go list -json and the
				688	// TestEvent.FailedBuild field of go test -json. Note that it does not
				689	// match TestEvent.Package.
				690	ImportPath string `json:"ImportPath"` // BuildEvent only
				691	// TestEvent only:
				692	// The Package field, if present, specifies the package being tested. When the
				693	// go command runs parallel tests in -json mode, events from different tests are
				694	// interlaced; the Package field allows readers to separate them.
				695	Package string `json:"Package"`
				696	// Action is used in both BuildEvent and TestEvent.
				697	// It is the key to distinguishing between them.
				698	// BuildEvent:
				699	// build-output or build-fail
				700	// TestEvent:
				701	// start, run, pause, cont, pass, bench, fail, output, skip
				702	Action string `json:"Action"`
				703	// TestEvent only:
				704	// The Test field, if present, specifies the test, example, or benchmark function
				705	// that caused the event. Events for the overall package test do not set Test.
				706	Test string `json:"Test"`
				707	// TestEvent only:
				708	// The Elapsed field is set for "pass" and "fail" events. It gives the time elapsed in seconds
				709	// for the specific test or the overall package test that passed or failed.
				710	Elapsed float64
				711	// TestEvent:
				712	// The Output field is set for Action == "output" and is a portion of the
				713	// test's output (standard output and standard error merged together). The
				714	// output is unmodified except that invalid UTF-8 output from a test is coerced
				715	// into valid UTF-8 by use of replacement characters. With that one exception,
				716	// the concatenation of the Output fields of all output events is the exact output
				717	// of the test execution.
				718	// BuildEvent:
				719	// The Output field is set for Action == "build-output" and is a portion of
				720	// the build's output. The concatenation of the Output fields of all output
				721	// events is the exact output of the build. A single event may contain one
				722	// or more lines of output and there may be more than one output event for
				723	// a given ImportPath. This matches the definition of the TestEvent.Output
				724	// field produced by go test -json.
				725	Output string `json:"Output"`
				726	// TestEvent only:
				727	// The FailedBuild field is set for Action == "fail" if the test failure was caused
				728	// by a build failure. It contains the package ID of the package that failed to
				729	// build. This matches the ImportPath field of the "go list" output, as well as the
				730	// BuildEvent.ImportPath field as emitted by "go build -json".
				731	FailedBuild string `json:"FailedBuild"`
				732	}
				733
				734	// parseTestResults converts test output in JSONL format into a slice of testJSON objects
				735	func parseTestResults(testOutput []byte) ([]testJSON, error) {
				736	var results []testJSON
				737	dec := json.NewDecoder(bytes.NewReader(testOutput))
				738	for {
				739	var event testJSON
				740	if err := dec.Decode(&event); err != nil {
				741	if err == io.EOF {
				742	break
				743	}
				744	return nil, err
				745	}
				746	results = append(results, event)
				747	}
				748	return results, nil
				749	}
				750
				751	// testStatus represents the status of a test in a given commit
				752	type testStatus int
				753
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	754	//go:generate go tool golang.org/x/tools/cmd/stringer -type=testStatus -trimprefix=testStatus
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	755	const (
				756	testStatusUnknown testStatus = iota
				757	testStatusPass
				758	testStatusFail
				759	testStatusBuildFail
				760	testStatusSkip
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	761	testStatusNoTests // no tests exist for this package
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	762	)
				763
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	764	// testRegression represents a test that regressed between commits
				765	type testRegression struct {
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	766	Package string
				767	Test string // empty for package tests
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	768	BeforeStatus testStatus
				769	AfterStatus testStatus
				770	Output string // failure output in the after state
				771	}
				772
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	773	func (r *testRegression) Source() string {
				774	if r.Test == "" {
				775	return r.Package
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	776	}
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	777	return fmt.Sprintf("%s.%s", r.Package, r.Test)
				778	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	779
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	780	type packageResult struct {
				781	Status testStatus // overall status for the package
				782	TestStatus map[string]testStatus // name -> status
				783	TestOutput map[string][]string // name -> output parts
				784	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	785
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	786	// collectTestStatuses processes a slice of test events and returns rich status information
				787	func collectTestStatuses(results []testJSON) map[string]*packageResult {
				788	m := make(map[string]*packageResult)
				789
				790	for _, event := range results {
				791	pkg := event.Package
				792	p, ok := m[pkg]
				793	if !ok {
				794	p = new(packageResult)
				795	p.TestStatus = make(map[string]testStatus)
				796	p.TestOutput = make(map[string][]string)
				797	m[pkg] = p
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	798	}
				799
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	800	switch event.Action {
				801	case "output":
				802	p.TestOutput[event.Test] = append(p.TestOutput[event.Test], event.Output)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	803	continue
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	804	case "pass":
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	805	if event.Test == "" {
				806	p.Status = testStatusPass
				807	} else {
				808	p.TestStatus[event.Test] = testStatusPass
				809	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	810	case "fail":
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	811	if event.Test == "" {
				812	if event.FailedBuild != "" {
				813	p.Status = testStatusBuildFail
				814	} else {
				815	p.Status = testStatusFail
				816	}
				817	} else {
				818	p.TestStatus[event.Test] = testStatusFail
				819	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	820	case "skip":
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	821	if event.Test == "" {
				822	p.Status = testStatusNoTests
				823	} else {
				824	p.TestStatus[event.Test] = testStatusSkip
				825	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	826	}
				827	}
				828
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	829	return m
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	830	}
				831
				832	// compareTestResults identifies tests that have regressed between commits
				833	func (r *CodeReviewer) compareTestResults(beforeResults, afterResults []testJSON) ([]testRegression, error) {
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	834	before := collectTestStatuses(beforeResults)
				835	after := collectTestStatuses(afterResults)
				836	var testLevelRegressions []testRegression
				837	var packageLevelRegressions []testRegression
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	838
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	839	afterPkgs := slices.Sorted(maps.Keys(after))
				840	for _, pkg := range afterPkgs {
				841	afterResult := after[pkg]
				842	afterStatus := afterResult.Status
				843	// Can't short-circuit here when tests are passing, because we need to check for skipped tests.
				844	beforeResult, ok := before[pkg]
				845	beforeStatus := testStatusNoTests
				846	if ok {
				847	beforeStatus = beforeResult.Status
				848	}
				849	// If things no longer build, stop at the package level.
				850	// Otherwise, proceed to the test level, so we have more precise information.
				851	if afterStatus == testStatusBuildFail && beforeStatus != testStatusBuildFail {
				852	packageLevelRegressions = append(packageLevelRegressions, testRegression{
				853	Package: pkg,
				854	BeforeStatus: beforeStatus,
				855	AfterStatus: afterStatus,
				856	})
				857	continue
				858	}
				859	tests := slices.Sorted(maps.Keys(afterResult.TestStatus))
				860	for _, test := range tests {
				861	afterStatus := afterResult.TestStatus[test]
				862	switch afterStatus {
				863	case testStatusPass:
				864	continue
				865	case testStatusUnknown:
				866	slog.WarnContext(context.Background(), "unknown test status", "package", pkg, "test", test)
				867	continue
				868	}
banksean	07b7400	2025-06-26 16:05:25 +0000	[diff] [blame]	869	beforeStatus := testStatusUnknown
				870	if beforeResult != nil {
				871	beforeStatus = beforeResult.TestStatus[test]
				872	}
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	873	if isRegression(beforeStatus, afterStatus) {
				874	testLevelRegressions = append(testLevelRegressions, testRegression{
				875	Package: pkg,
				876	Test: test,
				877	BeforeStatus: beforeStatus,
				878	AfterStatus: afterStatus,
				879	Output: strings.Join(afterResult.TestOutput[test], ""),
				880	})
				881	}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	882	}
				883	}
				884
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	885	// If we have test-level regressions, report only those
				886	// Otherwise, report package-level regressions
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	887	var regressions []testRegression
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	888	if len(testLevelRegressions) > 0 {
				889	regressions = testLevelRegressions
				890	} else {
				891	regressions = packageLevelRegressions
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	892	}
				893
				894	// Sort regressions for consistent output
				895	slices.SortFunc(regressions, func(a, b testRegression) int {
				896	// First by package
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	897	if c := strings.Compare(a.Package, b.Package); c != 0 {
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	898	return c
				899	}
				900	// Then by test name
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	901	return strings.Compare(a.Test, b.Test)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	902	})
				903
				904	return regressions, nil
				905	}
				906
				907	// badnessLevels maps test status to a badness level
				908	// Higher values indicate worse status (more severe issues)
				909	var badnessLevels = map[testStatus]int{
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	910	testStatusBuildFail: 5, // Worst
				911	testStatusFail: 4,
				912	testStatusSkip: 3,
				913	testStatusNoTests: 2,
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	914	testStatusPass: 1,
				915	testStatusUnknown: 0, // Least bad - avoids false positives
				916	}
				917
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	918	// regressionFormatter defines a mapping of before/after state pairs to descriptive messages
				919	type regressionKey struct {
				920	before, after testStatus
				921	}
				922
				923	var regressionMessages = map[regressionKey]string{
				924	{testStatusUnknown, testStatusBuildFail}: "New test has build/vet errors",
				925	{testStatusUnknown, testStatusFail}: "New test is failing",
				926	{testStatusUnknown, testStatusSkip}: "New test is skipped",
				927	{testStatusPass, testStatusBuildFail}: "Was passing, now has build/vet errors",
				928	{testStatusPass, testStatusFail}: "Was passing, now failing",
				929	{testStatusPass, testStatusSkip}: "Was passing, now skipped",
				930	{testStatusNoTests, testStatusBuildFail}: "Previously had no tests, now has build/vet errors",
				931	{testStatusNoTests, testStatusFail}: "Previously had no tests, now has failing tests",
				932	{testStatusNoTests, testStatusSkip}: "Previously had no tests, now has skipped tests",
				933	{testStatusSkip, testStatusBuildFail}: "Was skipped, now has build/vet errors",
				934	{testStatusSkip, testStatusFail}: "Was skipped, now failing",
				935	{testStatusFail, testStatusBuildFail}: "Was failing, now has build/vet errors",
				936	}
				937
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	938	// isRegression determines if a test has regressed based on before and after status
				939	// A regression is defined as an increase in badness level
				940	func isRegression(before, after testStatus) bool {
				941	// Higher badness level means worse status
				942	return badnessLevels[after] > badnessLevels[before]
				943	}
				944
				945	// formatTestRegressions generates a human-readable summary of test regressions
				946	func (r *CodeReviewer) formatTestRegressions(regressions []testRegression) string {
				947	if len(regressions) == 0 {
				948	return ""
				949	}
				950
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	951	buf := new(strings.Builder)
Philip Zeyliger	49edc92	2025-05-14 09:45:45 -0700	[diff] [blame]	952	fmt.Fprintf(buf, "Test regressions detected between initial commit (%s) and HEAD:\n\n", r.sketchBaseRef)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	953
				954	for i, reg := range regressions {
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	955	fmt.Fprintf(buf, "%d: %v: ", i+1, reg.Source())
				956	key := regressionKey{reg.BeforeStatus, reg.AfterStatus}
				957	message, exists := regressionMessages[key]
				958	if !exists {
				959	message = "Regression detected"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	960	}
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	961	fmt.Fprintf(buf, "%s\n", message)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	962	}
				963
Josh Bleecher Snyder	833a0f8	2025-04-24 18:39:36 +0000	[diff] [blame]	964	return buf.String()
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	965	}
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	966
				967	// RelatedFile represents a file historically related to the changed files
				968	type RelatedFile struct {
				969	Path string // Path to the file
				970	Correlation float64 // Correlation score (0.0-1.0)
				971	}
				972
Josh Bleecher Snyder	26b6f9b	2025-07-01 01:41:11 +0000	[diff] [blame]	973	// hashChangedFiles creates a deterministic hash of the changed files set
				974	func (r *CodeReviewer) hashChangedFiles(changedFiles []string) string {
				975	// Sort files for deterministic hashing
				976	sorted := slices.Clone(changedFiles)
				977	slices.Sort(sorted)
				978	h := sha256.New()
				979	enc := json.NewEncoder(h)
				980	err := enc.Encode(sorted)
				981	if err != nil {
				982	panic(err)
				983	}
				984	return hex.EncodeToString(h.Sum(nil))
				985	}
				986
				987	// findRelatedFiles reports files that are historically related to the changed files
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	988	// by analyzing git commit history for co-occurrences.
Josh Bleecher Snyder	26b6f9b	2025-07-01 01:41:11 +0000	[diff] [blame]	989	// This function implements caching to avoid duplicate CPU and LLM processing:
				990	// 1. If the exact same set of changedFiles has been processed before, return nil, nil
				991	// 2. If all related files have been previously reported, return nil, nil
				992	// 3. Otherwise, return the full set of related files and mark them as reported
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	993	func (r *CodeReviewer) findRelatedFiles(ctx context.Context, changedFiles []string) ([]RelatedFile, error) {
Josh Bleecher Snyder	26b6f9b	2025-07-01 01:41:11 +0000	[diff] [blame]	994	cf := r.hashChangedFiles(changedFiles)
				995	if r.processedChangedFileSets[cf] {
				996	return nil, nil
				997	}
				998	r.processedChangedFileSets[cf] = true // don't re-process, even on error
				999
				1000	relatedFiles, err := r.computeRelatedFiles(ctx, changedFiles)
				1001	if err != nil {
				1002	return nil, err
				1003	}
				1004
				1005	hasNew := false
				1006	for _, rf := range relatedFiles {
				1007	if !r.reportedRelatedFiles[rf.Path] {
				1008	hasNew = true
				1009	break
				1010	}
				1011	}
				1012	if !hasNew {
				1013	return nil, nil
				1014	}
				1015
				1016	// We have new file(s) that haven't been called to the LLM's attention yet.
				1017	for _, rf := range relatedFiles {
				1018	r.reportedRelatedFiles[rf.Path] = true
				1019	}
				1020
				1021	return relatedFiles, nil
				1022	}
				1023
				1024	// computeRelatedFiles implements findRelatedFiles, without caching.
				1025	func (r *CodeReviewer) computeRelatedFiles(ctx context.Context, changedFiles []string) ([]RelatedFile, error) {
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	1026	commits, err := r.getCommitsTouchingFiles(ctx, changedFiles)
				1027	if err != nil {
				1028	return nil, fmt.Errorf("failed to get commits touching files: %w", err)
				1029	}
				1030	if len(commits) == 0 {
				1031	return nil, nil
				1032	}
				1033
				1034	relChanged := make(map[string]bool, len(changedFiles))
				1035	for _, file := range changedFiles {
				1036	rel, err := filepath.Rel(r.repoRoot, file)
				1037	if err != nil {
				1038	return nil, fmt.Errorf("failed to get relative path for %s: %w", file, err)
				1039	}
				1040	relChanged[rel] = true
				1041	}
				1042
				1043	historyFiles := make(map[string]int)
				1044	var historyMu sync.Mutex
				1045
				1046	maxWorkers := runtime.GOMAXPROCS(0)
				1047	semaphore := make(chan bool, maxWorkers)
				1048	var wg sync.WaitGroup
				1049
				1050	for _, commit := range commits {
				1051	wg.Add(1)
				1052	semaphore <- true // acquire
				1053
				1054	go func(commit string) {
				1055	defer wg.Done()
				1056	defer func() { <-semaphore }() // release
				1057	commitFiles, err := r.getFilesInCommit(ctx, commit)
				1058	if err != nil {
				1059	slog.WarnContext(ctx, "Failed to get files in commit", "commit", commit, "err", err)
				1060	return
				1061	}
				1062	incr := 0
				1063	for _, file := range commitFiles {
				1064	if relChanged[file] {
				1065	incr++
				1066	}
				1067	}
				1068	if incr == 0 {
				1069	return
				1070	}
				1071	historyMu.Lock()
				1072	defer historyMu.Unlock()
				1073	for _, file := range commitFiles {
				1074	historyFiles[file] += incr
				1075	}
				1076	}(commit)
				1077	}
				1078	wg.Wait()
				1079
				1080	// normalize
				1081	maxCount := 0
				1082	for _, count := range historyFiles {
				1083	maxCount = max(maxCount, count)
				1084	}
				1085	if maxCount == 0 {
				1086	return nil, nil
				1087	}
				1088
				1089	var relatedFiles []RelatedFile
				1090	for file, count := range historyFiles {
				1091	if relChanged[file] {
				1092	// Don't include inputs in the output.
				1093	continue
				1094	}
				1095	correlation := float64(count) / float64(maxCount)
				1096	// Require min correlation to avoid noise
				1097	if correlation >= 0.1 {
Pokey Rule	75f93a1	2025-05-15 13:51:55 +0000	[diff] [blame]	1098	// Check if the file still exists in the repository
				1099	fullPath := filepath.Join(r.repoRoot, file)
				1100	if _, err := os.Stat(fullPath); err == nil {
				1101	relatedFiles = append(relatedFiles, RelatedFile{Path: file, Correlation: correlation})
				1102	}
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	1103	}
				1104	}
				1105
Josh Bleecher Snyder	a727f70	2025-06-04 18:47:33 -0700	[diff] [blame]	1106	// Highest correlation first, then sort by path.
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	1107	slices.SortFunc(relatedFiles, func(a, b RelatedFile) int {
Josh Bleecher Snyder	a727f70	2025-06-04 18:47:33 -0700	[diff] [blame]	1108	return cmp.Or(
				1109	-1*cmp.Compare(a.Correlation, b.Correlation),
				1110	cmp.Compare(a.Path, b.Path),
				1111	)
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	1112	})
				1113
				1114	// Limit to 1 correlated file per input file.
				1115	// (Arbitrary limit, to be adjusted.)
				1116	maxFiles := len(changedFiles)
				1117	if len(relatedFiles) > maxFiles {
				1118	relatedFiles = relatedFiles[:maxFiles]
				1119	}
				1120
				1121	// TODO: add an LLM in the mix here (like the keyword search tool) to do a filtering pass,
				1122	// and then increase the strength of the wording in the relatedFiles message.
				1123
				1124	return relatedFiles, nil
				1125	}
				1126
				1127	// getCommitsTouchingFiles returns all commits that touch any of the specified files
				1128	func (r *CodeReviewer) getCommitsTouchingFiles(ctx context.Context, files []string) ([]string, error) {
				1129	if len(files) == 0 {
				1130	return nil, nil
				1131	}
Josh Bleecher Snyder	d4b1cc7	2025-04-29 13:49:18 +0000	[diff] [blame]	1132	fileArgs := append([]string{"rev-list", "--all", "--date-order", "--max-count=100", "--"}, files...)
Josh Bleecher Snyder	ffecb1e	2025-04-28 18:59:14 +0000	[diff] [blame]	1133	cmd := exec.CommandContext(ctx, "git", fileArgs...)
				1134	cmd.Dir = r.repoRoot
				1135	out, err := cmd.CombinedOutput()
				1136	if err != nil {
				1137	return nil, fmt.Errorf("failed to get commits: %w\n%s", err, out)
				1138	}
				1139	return nonEmptyTrimmedLines(out), nil
				1140	}
				1141
				1142	// getFilesInCommit returns all files changed in a specific commit
				1143	func (r *CodeReviewer) getFilesInCommit(ctx context.Context, commit string) ([]string, error) {
				1144	cmd := exec.CommandContext(ctx, "git", "diff-tree", "--no-commit-id", "--name-only", "-r", commit)
				1145	cmd.Dir = r.repoRoot
				1146	out, err := cmd.CombinedOutput()
				1147	if err != nil {
				1148	return nil, fmt.Errorf("failed to get files in commit: %w\n%s", err, out)
				1149	}
				1150	return nonEmptyTrimmedLines(out), nil
				1151	}
				1152
				1153	func nonEmptyTrimmedLines(b []byte) []string {
				1154	var lines []string
				1155	for line := range strings.Lines(string(b)) {
				1156	line = strings.TrimSpace(line)
				1157	if line != "" {
				1158	lines = append(lines, line)
				1159	}
				1160	}
				1161	return lines
				1162	}
				1163
				1164	// formatRelatedFiles formats the related files list into a human-readable message
				1165	func (r *CodeReviewer) formatRelatedFiles(files []RelatedFile) string {
				1166	if len(files) == 0 {
				1167	return ""
				1168	}
				1169
				1170	buf := new(strings.Builder)
				1171
				1172	fmt.Fprintf(buf, "Potentially related files:\n\n")
				1173
				1174	for _, file := range files {
				1175	fmt.Fprintf(buf, "- %s (%0.0f%%)\n", file.Path, 100*file.Correlation)
				1176	}
				1177
				1178	fmt.Fprintf(buf, "\nThese files have historically changed with the files you have modified. Consider whether they require updates as well.\n")
				1179	return buf.String()
				1180	}
Josh Bleecher Snyder	de5f744	2025-05-15 18:32:32 +0000	[diff] [blame]	1181
				1182	// shouldIgnoreDiagnostic reports whether a diagnostic message matches any of the patterns in goplsIgnore.
				1183	func shouldIgnoreDiagnostic(message string) bool {
				1184	for _, pattern := range goplsIgnore {
				1185	if strings.Contains(message, pattern) {
				1186	return true
				1187	}
				1188	}
				1189	return false
				1190	}
Josh Bleecher Snyder	6534c7a	2025-07-01 01:48:52 +0000	[diff] [blame]	1191
				1192	// WarmTestCache runs 'go test -c' on relevant packages in the background
				1193	// to warm up the Go build cache. This is intended to be called after patch
				1194	// operations to prepare for future differential testing.
				1195	// It uses the base commit (before state) to warm cache for packages that
				1196	// will likely be tested during code review.
				1197	func (r *CodeReviewer) WarmTestCache(modifiedFile string) {
				1198	if !r.isGoRepository() {
				1199	return
				1200	}
				1201	if !strings.HasSuffix(modifiedFile, ".go") {
				1202	return
				1203	}
				1204
				1205	// Worktree must be created serially
				1206	ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
				1207	if err := r.initializeInitialCommitWorktree(ctx); err != nil {
				1208	cancel()
				1209	return
				1210	}
				1211
				1212	go func() {
				1213	defer cancel()
				1214
				1215	if err := r.warmTestCache(ctx, modifiedFile); err != nil {
				1216	slog.DebugContext(ctx, "cache warming failed", "err", err)
				1217	}
				1218	}()
				1219	}
				1220
				1221	func (r *CodeReviewer) warmTestCache(ctx context.Context, modifiedFile string) error {
				1222	allPkgs, err := r.packagesForFiles(ctx, []string{r.absPath(modifiedFile)})
				1223	if err != nil {
				1224	return fmt.Errorf("failed to get packages for files: %w", err)
				1225	}
				1226	if len(allPkgs) == 0 {
				1227	return nil
				1228	}
				1229
				1230	var pkgPaths []string
				1231	r.warmMutex.Lock()
				1232	for pkgPath := range allPkgs {
				1233	if strings.HasSuffix(pkgPath, ".test") {
				1234	continue
				1235	}
				1236	if r.warmedPackages[pkgPath] {
				1237	continue
				1238	}
				1239	// One attempt is enough.
				1240	r.warmedPackages[pkgPath] = true
				1241	pkgPaths = append(pkgPaths, pkgPath)
				1242	}
				1243	r.warmMutex.Unlock()
				1244
				1245	if len(pkgPaths) == 0 {
				1246	return nil
				1247	}
				1248
				1249	// Avoid stressing the machine: max 2 concurrent processes.
				1250	args := []string{"test", "-c", "-p", "2", "-o", "/dev/null"}
				1251	args = append(args, pkgPaths...)
				1252
				1253	cmd := exec.CommandContext(ctx, "go", args...)
				1254	cmd.Dir = r.initialWorktree
				1255	cmd.Stdout = io.Discard
				1256	cmd.Stderr = io.Discard
				1257
				1258	slog.DebugContext(ctx, "warming test cache", "packages", len(pkgPaths), "worktree", r.initialWorktree)
				1259
				1260	start := time.Now()
				1261	// Run the command and ignore errors - this is best effort
				1262	err = cmd.Run()
				1263	slog.DebugContext(ctx, "cache warming complete", "duration", time.Since(start), "error", err)
				1264	return nil
				1265	}