claudetool/codereview: add caching in findRelatedFiles
Primary goal is latency reduction.
Also slightly reduces context usage.
Co-Authored-By: sketch <hello@sketch.dev>
Change-ID: sa1007d82a5165ab4k
diff --git a/claudetool/codereview/differential.go b/claudetool/codereview/differential.go
index ff25b22..77dde85 100644
--- a/claudetool/codereview/differential.go
+++ b/claudetool/codereview/differential.go
@@ -4,6 +4,8 @@
"bytes"
"cmp"
"context"
+ "crypto/sha256"
+ "encoding/hex"
"encoding/json"
"fmt"
"io"
@@ -968,9 +970,59 @@
Correlation float64 // Correlation score (0.0-1.0)
}
-// findRelatedFiles identifies files that are historically related to the changed files
+// hashChangedFiles creates a deterministic hash of the changed files set
+func (r *CodeReviewer) hashChangedFiles(changedFiles []string) string {
+ // Sort files for deterministic hashing
+ sorted := slices.Clone(changedFiles)
+ slices.Sort(sorted)
+ h := sha256.New()
+ enc := json.NewEncoder(h)
+ err := enc.Encode(sorted)
+ if err != nil {
+ panic(err)
+ }
+ return hex.EncodeToString(h.Sum(nil))
+}
+
+// findRelatedFiles reports files that are historically related to the changed files
// by analyzing git commit history for co-occurrences.
+// This function implements caching to avoid duplicate CPU and LLM processing:
+// 1. If the exact same set of changedFiles has been processed before, return nil, nil
+// 2. If all related files have been previously reported, return nil, nil
+// 3. Otherwise, return the full set of related files and mark them as reported
func (r *CodeReviewer) findRelatedFiles(ctx context.Context, changedFiles []string) ([]RelatedFile, error) {
+ cf := r.hashChangedFiles(changedFiles)
+ if r.processedChangedFileSets[cf] {
+ return nil, nil
+ }
+ r.processedChangedFileSets[cf] = true // don't re-process, even on error
+
+ relatedFiles, err := r.computeRelatedFiles(ctx, changedFiles)
+ if err != nil {
+ return nil, err
+ }
+
+ hasNew := false
+ for _, rf := range relatedFiles {
+ if !r.reportedRelatedFiles[rf.Path] {
+ hasNew = true
+ break
+ }
+ }
+ if !hasNew {
+ return nil, nil
+ }
+
+ // We have new file(s) that haven't been called to the LLM's attention yet.
+ for _, rf := range relatedFiles {
+ r.reportedRelatedFiles[rf.Path] = true
+ }
+
+ return relatedFiles, nil
+}
+
+// computeRelatedFiles implements findRelatedFiles, without caching.
+func (r *CodeReviewer) computeRelatedFiles(ctx context.Context, changedFiles []string) ([]RelatedFile, error) {
commits, err := r.getCommitsTouchingFiles(ctx, changedFiles)
if err != nil {
return nil, fmt.Errorf("failed to get commits touching files: %w", err)