claudetool: add "related files" detection to codereview tool

diff --git a/claudetool/differential.go b/claudetool/differential.go
index 014fc13..710a09d 100644
--- a/claudetool/differential.go
+++ b/claudetool/differential.go
@@ -2,6 +2,7 @@
 
 import (
 	"bytes"
+	"cmp"
 	"context"
 	"encoding/json"
 	"fmt"
@@ -12,8 +13,10 @@
 	"os"
 	"os/exec"
 	"path/filepath"
+	"runtime"
 	"slices"
 	"strings"
+	"sync"
 	"time"
 
 	"golang.org/x/tools/go/packages"
@@ -78,7 +81,20 @@
 	}
 	allPkgList := slices.Collect(maps.Keys(allPkgs))
 
-	var msgs []string
+	var errorMessages []string // problems we want the model to address
+	var infoMessages []string  // info the model should consider
+
+	// Find potentially related files that should also be considered
+	// TODO: add some caching here, since this depends only on the initial commit and the changed files, not the details of the changes
+	relatedFiles, err := r.findRelatedFiles(ctx, changedFiles)
+	if err != nil {
+		slog.DebugContext(ctx, "CodeReviewer.Run: failed to find related files", "err", err)
+	} else {
+		relatedMsg := r.formatRelatedFiles(relatedFiles)
+		if relatedMsg != "" {
+			infoMessages = append(infoMessages, relatedMsg)
+		}
+	}
 
 	testMsg, err := r.checkTests(ctx, allPkgList)
 	if err != nil {
@@ -86,7 +102,7 @@
 		return "", err
 	}
 	if testMsg != "" {
-		msgs = append(msgs, testMsg)
+		errorMessages = append(errorMessages, testMsg)
 	}
 
 	goplsMsg, err := r.checkGopls(ctx, changedFiles) // includes vet checks
@@ -95,17 +111,24 @@
 		return "", err
 	}
 	if goplsMsg != "" {
-		msgs = append(msgs, goplsMsg)
+		errorMessages = append(errorMessages, goplsMsg)
 	}
 
-	if len(msgs) == 0 {
-		slog.DebugContext(ctx, "CodeReviewer.Run: no issues found")
-		return "OK", nil
+	buf := new(strings.Builder)
+	if len(infoMessages) > 0 {
+		buf.WriteString("# Info\n\n")
+		buf.WriteString(strings.Join(infoMessages, "\n\n"))
+		buf.WriteString("\n\n")
 	}
-
-	msgs = append(msgs, "Please fix before proceeding.")
-	slog.DebugContext(ctx, "CodeReviewer.Run: found issues", "issues", msgs)
-	return strings.Join(msgs, "\n\n"), nil
+	if len(errorMessages) > 0 {
+		buf.WriteString("# Errors\n\n")
+		buf.WriteString(strings.Join(errorMessages, "\n\n"))
+		buf.WriteString("\n\nPlease fix before proceeding.\n")
+	}
+	if buf.Len() == 0 {
+		buf.WriteString("OK")
+	}
+	return buf.String(), nil
 }
 
 func (r *CodeReviewer) initializeInitialCommitWorktree(ctx context.Context) error {
@@ -845,3 +868,161 @@
 
 	return buf.String()
 }
+
+// RelatedFile represents a file historically related to the changed files
+type RelatedFile struct {
+	Path        string  // Path to the file
+	Correlation float64 // Correlation score (0.0-1.0)
+}
+
+// findRelatedFiles identifies files that are historically related to the changed files
+// by analyzing git commit history for co-occurrences.
+func (r *CodeReviewer) findRelatedFiles(ctx context.Context, changedFiles []string) ([]RelatedFile, error) {
+	commits, err := r.getCommitsTouchingFiles(ctx, changedFiles)
+	if err != nil {
+		return nil, fmt.Errorf("failed to get commits touching files: %w", err)
+	}
+	if len(commits) == 0 {
+		return nil, nil
+	}
+
+	relChanged := make(map[string]bool, len(changedFiles))
+	for _, file := range changedFiles {
+		rel, err := filepath.Rel(r.repoRoot, file)
+		if err != nil {
+			return nil, fmt.Errorf("failed to get relative path for %s: %w", file, err)
+		}
+		relChanged[rel] = true
+	}
+
+	historyFiles := make(map[string]int)
+	var historyMu sync.Mutex
+
+	maxWorkers := runtime.GOMAXPROCS(0)
+	semaphore := make(chan bool, maxWorkers)
+	var wg sync.WaitGroup
+
+	for _, commit := range commits {
+		wg.Add(1)
+		semaphore <- true // acquire
+
+		go func(commit string) {
+			defer wg.Done()
+			defer func() { <-semaphore }() // release
+			commitFiles, err := r.getFilesInCommit(ctx, commit)
+			if err != nil {
+				slog.WarnContext(ctx, "Failed to get files in commit", "commit", commit, "err", err)
+				return
+			}
+			incr := 0
+			for _, file := range commitFiles {
+				if relChanged[file] {
+					incr++
+				}
+			}
+			if incr == 0 {
+				return
+			}
+			historyMu.Lock()
+			defer historyMu.Unlock()
+			for _, file := range commitFiles {
+				historyFiles[file] += incr
+			}
+		}(commit)
+	}
+	wg.Wait()
+
+	// normalize
+	maxCount := 0
+	for _, count := range historyFiles {
+		maxCount = max(maxCount, count)
+	}
+	if maxCount == 0 {
+		return nil, nil
+	}
+
+	var relatedFiles []RelatedFile
+	for file, count := range historyFiles {
+		if relChanged[file] {
+			// Don't include inputs in the output.
+			continue
+		}
+		correlation := float64(count) / float64(maxCount)
+		// Require min correlation to avoid noise
+		if correlation >= 0.1 {
+			relatedFiles = append(relatedFiles, RelatedFile{Path: file, Correlation: correlation})
+		}
+	}
+
+	// Highest correlation first
+	slices.SortFunc(relatedFiles, func(a, b RelatedFile) int {
+		return -1 * cmp.Compare(a.Correlation, b.Correlation)
+	})
+
+	// Limit to 1 correlated file per input file.
+	// (Arbitrary limit, to be adjusted.)
+	maxFiles := len(changedFiles)
+	if len(relatedFiles) > maxFiles {
+		relatedFiles = relatedFiles[:maxFiles]
+	}
+
+	// TODO: add an LLM in the mix here (like the keyword search tool) to do a filtering pass,
+	// and then increase the strength of the wording in the relatedFiles message.
+
+	return relatedFiles, nil
+}
+
+// getCommitsTouchingFiles returns all commits that touch any of the specified files
+func (r *CodeReviewer) getCommitsTouchingFiles(ctx context.Context, files []string) ([]string, error) {
+	if len(files) == 0 {
+		return nil, nil
+	}
+	fileArgs := append([]string{"rev-list", "--all", "--"}, files...)
+	cmd := exec.CommandContext(ctx, "git", fileArgs...)
+	cmd.Dir = r.repoRoot
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		return nil, fmt.Errorf("failed to get commits: %w\n%s", err, out)
+	}
+	return nonEmptyTrimmedLines(out), nil
+}
+
+// getFilesInCommit returns all files changed in a specific commit
+func (r *CodeReviewer) getFilesInCommit(ctx context.Context, commit string) ([]string, error) {
+	cmd := exec.CommandContext(ctx, "git", "diff-tree", "--no-commit-id", "--name-only", "-r", commit)
+	cmd.Dir = r.repoRoot
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		return nil, fmt.Errorf("failed to get files in commit: %w\n%s", err, out)
+	}
+	return nonEmptyTrimmedLines(out), nil
+}
+
+func nonEmptyTrimmedLines(b []byte) []string {
+	var lines []string
+	for line := range strings.Lines(string(b)) {
+		line = strings.TrimSpace(line)
+		if line != "" {
+			lines = append(lines, line)
+		}
+	}
+	return lines
+}
+
+// formatRelatedFiles formats the related files list into a human-readable message
+func (r *CodeReviewer) formatRelatedFiles(files []RelatedFile) string {
+	if len(files) == 0 {
+		return ""
+	}
+
+	buf := new(strings.Builder)
+
+	fmt.Fprintf(buf, "Potentially related files:\n\n")
+
+	for _, file := range files {
+		fmt.Fprintf(buf, "- %s (%0.0f%%)\n", file.Path, 100*file.Correlation)
+	}
+
+	fmt.Fprintf(buf, "\nThese files have historically changed with the files you have modified. Consider whether they require updates as well.\n")
+	return buf.String()
+}
diff --git a/claudetool/testdata/add_skipped_test.txtar b/claudetool/testdata/add_skipped_test.txtar
index 95000cc..099ebdd 100644
--- a/claudetool/testdata/add_skipped_test.txtar
+++ b/claudetool/testdata/add_skipped_test.txtar
@@ -19,6 +19,8 @@
 Add skipped test
 
 -- .run_test --
+# Errors
+
 Test regressions detected between initial commit (2cdfdc5009e364add9a263909472f85eb08480ed) and HEAD:
 
 1: sketch.dev.TestP: New test is skipped
diff --git a/claudetool/testdata/empty_testdir_add_file.txtar b/claudetool/testdata/empty_testdir_add_file.txtar
index b754f3d..7c2be2d 100644
--- a/claudetool/testdata/empty_testdir_add_file.txtar
+++ b/claudetool/testdata/empty_testdir_add_file.txtar
@@ -29,6 +29,8 @@
 Add non-empty file to testdata
 
 -- .run_test --
+# Errors
+
 Test regressions detected between initial commit (cdc9ec6dfb8669c11d8aa0df49c72112627784dc) and HEAD:
 
 1: sketch.dev.TestEmptyTestdata: Was passing, now failing
diff --git a/claudetool/testdata/gopls_issues.txtar b/claudetool/testdata/gopls_issues.txtar
index 56f1557..c0250ac 100644
--- a/claudetool/testdata/gopls_issues.txtar
+++ b/claudetool/testdata/gopls_issues.txtar
@@ -15,6 +15,8 @@
 Add file with gopls issues
 
 -- .run_test --
+# Errors
+
 Gopls check issues detected:
 
 1. /PATH/TO/REPO/p.go:5:2-22: unreachable code
diff --git a/claudetool/testdata/gopls_new_issue_with_existing.txtar b/claudetool/testdata/gopls_new_issue_with_existing.txtar
index d47fb02..594ea93 100644
--- a/claudetool/testdata/gopls_new_issue_with_existing.txtar
+++ b/claudetool/testdata/gopls_new_issue_with_existing.txtar
@@ -26,6 +26,8 @@
 Add a new function with a new gopls issue
 
 -- .run_test --
+# Errors
+
 Gopls check issues detected:
 
 1. /PATH/TO/REPO/p.go:9:2-31: unreachable code
diff --git a/claudetool/testdata/mark_test_skipped.txtar b/claudetool/testdata/mark_test_skipped.txtar
index bf887e3..d834ae4 100644
--- a/claudetool/testdata/mark_test_skipped.txtar
+++ b/claudetool/testdata/mark_test_skipped.txtar
@@ -24,6 +24,8 @@
 Skip test
 
 -- .run_test --
+# Errors
+
 Test regressions detected between initial commit (851ba0628e0e2d93e27e168d6af336f3e4d375c7) and HEAD:
 
 1: sketch.dev.TestP: Was passing, now skipped
diff --git a/claudetool/testdata/multi_commit_review.txtar b/claudetool/testdata/multi_commit_review.txtar
index 2286dee..cbf2464 100644
--- a/claudetool/testdata/multi_commit_review.txtar
+++ b/claudetool/testdata/multi_commit_review.txtar
@@ -36,6 +36,8 @@
 Add yet another file
 
 -- .run_test --
+# Errors
+
 Test regressions detected between initial commit (a4fd3c2166b35e02a2cde466880e45aea6e6212e) and HEAD:
 
 1: sketch.dev.TestX: New test is failing
diff --git a/claudetool/testdata/new_test_build_error.txtar b/claudetool/testdata/new_test_build_error.txtar
index 2fe1d34..2405c49 100644
--- a/claudetool/testdata/new_test_build_error.txtar
+++ b/claudetool/testdata/new_test_build_error.txtar
@@ -19,6 +19,8 @@
 Add test with build error
 
 -- .run_test --
+# Errors
+
 Test regressions detected between initial commit (cf75ed70e940ce33a17a70b1894ad053b67731c0) and HEAD:
 
 1: sketch.dev: Previously had no tests, now has build/vet errors
diff --git a/claudetool/testdata/no_tests_to_failing_tests.txtar b/claudetool/testdata/no_tests_to_failing_tests.txtar
index b0cc956..6161586 100644
--- a/claudetool/testdata/no_tests_to_failing_tests.txtar
+++ b/claudetool/testdata/no_tests_to_failing_tests.txtar
@@ -19,6 +19,8 @@
 Add failing test
 
 -- .run_test --
+# Errors
+
 Test regressions detected between initial commit (cf75ed70e940ce33a17a70b1894ad053b67731c0) and HEAD:
 
 1: sketch.dev.TestP: New test is failing
diff --git a/claudetool/testdata/passing_to_build_error.txtar b/claudetool/testdata/passing_to_build_error.txtar
index 9558862..6c7d1d0 100644
--- a/claudetool/testdata/passing_to_build_error.txtar
+++ b/claudetool/testdata/passing_to_build_error.txtar
@@ -24,6 +24,8 @@
 Break test with build error
 
 -- .run_test --
+# Errors
+
 Test regressions detected between initial commit (149798f10aaf06bc4700bb62d46ed6d8d88e35ad) and HEAD:
 
 1: sketch.dev: Was passing, now has build/vet errors
diff --git a/claudetool/testdata/passing_to_failing.txtar b/claudetool/testdata/passing_to_failing.txtar
index 31a0556..50afae7 100644
--- a/claudetool/testdata/passing_to_failing.txtar
+++ b/claudetool/testdata/passing_to_failing.txtar
@@ -24,6 +24,8 @@
 Break test
 
 -- .run_test --
+# Errors
+
 Test regressions detected between initial commit (149798f10aaf06bc4700bb62d46ed6d8d88e35ad) and HEAD:
 
 1: sketch.dev.TestP: Was passing, now failing
diff --git a/claudetool/testdata/passing_to_failing_subdir.txtar b/claudetool/testdata/passing_to_failing_subdir.txtar
index 7649554..1211912 100644
--- a/claudetool/testdata/passing_to_failing_subdir.txtar
+++ b/claudetool/testdata/passing_to_failing_subdir.txtar
@@ -24,6 +24,8 @@
 Break test
 
 -- .run_test --
+# Errors
+
 Test regressions detected between initial commit (8c6c71d0beac5e27b87dfc3f2fd9d274b62c3d3a) and HEAD:
 
 1: sketch.dev/foo.TestP: Was passing, now failing
diff --git a/claudetool/testdata/related_files_cooccurrence.txtar b/claudetool/testdata/related_files_cooccurrence.txtar
new file mode 100644
index 0000000..b15f325
--- /dev/null
+++ b/claudetool/testdata/related_files_cooccurrence.txtar
@@ -0,0 +1,103 @@
+Tests related files identification based on historical co-occurrence
+
+-- a.go --
+package main
+
+func a() {}
+
+-- b.go --
+package main
+
+func b() {}
+
+-- c.go --
+package main
+
+func c() {}
+
+-- p.go --
+package p
+
+func d() {}
+
+-- .commit --
+Add functions to a.go and b.go
+
+-- a.go --
+package main
+
+func a() {
+    // Update 1
+}
+
+-- b.go --
+package main
+
+func b() {
+    // Update 1
+}
+
+-- .commit --
+Add functions to a.go and b.go again
+
+-- a.go --
+package main
+
+func a() {
+    // Update 2
+}
+
+-- b.go --
+package main
+
+func b() {
+    // Update 2
+}
+
+-- .commit --
+Add functions to a.go and c.go
+
+-- a.go --
+package main
+
+func a() {
+    // Update 3
+}
+
+-- c.go --
+package main
+
+func c() {
+    // Update 1
+}
+
+-- .commit --
+Update file a.go only
+
+-- a.go --
+package main
+
+func a() {
+    x := 42 // new gopls issue to view mixed info/error lines
+}
+
+-- .run_test --
+# Info
+
+Potentially related files:
+
+- p.go (33%)
+
+These files have historically changed with the files you have modified. Consider whether they require updates as well.
+
+
+# Errors
+
+Gopls check issues detected:
+
+1. /PATH/TO/REPO/a.go:4:5-6: declared and not used: x
+
+IMPORTANT: Only fix new gopls check issues in parts of the code that you have already edited. Do not change existing code that was not part of your current edits.
+
+
+Please fix before proceeding.
diff --git a/claudetool/testdata/skipped_to_build_error.txtar b/claudetool/testdata/skipped_to_build_error.txtar
index f648572..671b53b 100644
--- a/claudetool/testdata/skipped_to_build_error.txtar
+++ b/claudetool/testdata/skipped_to_build_error.txtar
@@ -25,6 +25,8 @@
 Change skipped test to test with build error
 
 -- .run_test --
+# Errors
+
 Test regressions detected between initial commit (3635353dfefb2fa652728318567209ccfc4aba42) and HEAD:
 
 1: sketch.dev: Was passing, now has build/vet errors
diff --git a/claudetool/testdata/skipped_to_failing.txtar b/claudetool/testdata/skipped_to_failing.txtar
index 50348d2..98935e8 100644
--- a/claudetool/testdata/skipped_to_failing.txtar
+++ b/claudetool/testdata/skipped_to_failing.txtar
@@ -25,6 +25,8 @@
 Change skipped test to failing test
 
 -- .run_test --
+# Errors
+
 Test regressions detected between initial commit (3635353dfefb2fa652728318567209ccfc4aba42) and HEAD:
 
 1: sketch.dev.TestP: Was skipped, now failing
diff --git a/claudetool/testdata/vet_error_test.txtar b/claudetool/testdata/vet_error_test.txtar
index 8c9dc64..0163a81 100644
--- a/claudetool/testdata/vet_error_test.txtar
+++ b/claudetool/testdata/vet_error_test.txtar
@@ -21,6 +21,8 @@
 Add vet error
 
 -- .run_test --
+# Errors
+
 Test regressions detected between initial commit (656e4d573cb77a30ebce29ebb974533ece282946) and HEAD:
 
 1: sketch.dev: Previously had no tests, now has build/vet errors