claudetool/codereview: add caching in findRelatedFiles
Primary goal is latency reduction.
Also slightly reduces context usage.
Co-Authored-By: sketch <hello@sketch.dev>
Change-ID: sa1007d82a5165ab4k
diff --git a/claudetool/codereview/testdata/caching_demo_working.txtar b/claudetool/codereview/testdata/caching_demo_working.txtar
new file mode 100644
index 0000000..b57abc5
--- /dev/null
+++ b/claudetool/codereview/testdata/caching_demo_working.txtar
@@ -0,0 +1,108 @@
+Tests related files caching with a working relationship pattern
+
+-- a.go --
+package main
+
+func a() {}
+
+-- b.go --
+package main
+
+func b() {}
+
+-- c.go --
+package main
+
+func c() {}
+
+-- p.go --
+package p
+
+func d() {}
+
+-- .commit --
+Add functions to a.go and b.go
+
+-- a.go --
+package main
+
+func a() {
+ // Update 1
+}
+
+-- b.go --
+package main
+
+func b() {
+ // Update 1
+}
+
+-- .commit --
+Add functions to a.go and b.go again
+
+-- a.go --
+package main
+
+func a() {
+ // Update 2
+}
+
+-- b.go --
+package main
+
+func b() {
+ // Update 2
+}
+
+-- .commit --
+Add functions to a.go and c.go
+
+-- a.go --
+package main
+
+func a() {
+ // Update 3
+}
+
+-- c.go --
+package main
+
+func c() {
+ // Update 1
+}
+
+-- .commit --
+Update file a.go only (first time)
+
+-- a.go --
+package main
+
+func a() {
+ // Update 4 - first analysis
+}
+
+-- .commit --
+First analysis
+
+-- .run_test --
+# Info
+
+Potentially related files:
+
+- p.go (30%)
+
+These files have historically changed with the files you have modified. Consider whether they require updates as well.
+
+
+-- a.go --
+package main
+
+func a() {
+ // Update 5 - second analysis (should cache related files)
+}
+
+-- .commit --
+Second analysis (should cache related files)
+
+-- .run_test --
+OK
diff --git a/claudetool/codereview/testdata/related_files_cache_all_previously_reported.txtar b/claudetool/codereview/testdata/related_files_cache_all_previously_reported.txtar
new file mode 100644
index 0000000..0777729
--- /dev/null
+++ b/claudetool/codereview/testdata/related_files_cache_all_previously_reported.txtar
@@ -0,0 +1,98 @@
+Tests related files caching when all related files have been previously reported
+
+-- a.go --
+package main
+
+func a() {}
+
+-- b.go --
+package main
+
+func b() {}
+
+-- c.go --
+package main
+
+func c() {}
+
+-- d.go --
+package main
+
+func d() {}
+
+-- .commit --
+Create initial commit
+
+-- a.go --
+package main
+
+func a() {
+ // Update 1
+}
+
+-- b.go --
+package main
+
+func b() {
+ // Update 1
+}
+
+-- .commit --
+Update a.go and b.go together
+
+-- a.go --
+package main
+
+func a() {
+ // Update 2
+}
+
+-- c.go --
+package main
+
+func c() {
+ // Update 1
+}
+
+-- .commit --
+Update a.go and c.go together
+
+-- a.go --
+package main
+
+func a() {
+ // Update 3 - first time, will report b.go and c.go
+}
+
+-- .commit --
+First code review - reports related files
+
+-- .run_test --
+# Info
+
+Potentially related files:
+
+- d.go (38%)
+
+These files have historically changed with the files you have modified. Consider whether they require updates as well.
+
+
+-- b.go --
+package main
+
+func b() {
+ // Update 2 - different changeset, but b.go was already reported
+}
+
+-- c.go --
+package main
+
+func c() {
+ // Update 2 - different changeset, but c.go was already reported
+}
+
+-- .commit --
+Different changeset, but all related files already reported
+
+-- .run_test --
+OK
diff --git a/claudetool/codereview/testdata/related_files_cache_demo.txtar b/claudetool/codereview/testdata/related_files_cache_demo.txtar
new file mode 100644
index 0000000..997af17
--- /dev/null
+++ b/claudetool/codereview/testdata/related_files_cache_demo.txtar
@@ -0,0 +1,95 @@
+Tests that related files caching prevents duplicate processing and output
+
+-- a.go --
+package main
+
+func a() {}
+
+-- b.go --
+package main
+
+func b() {}
+
+-- c.go --
+package main
+
+func c() {}
+
+-- .commit --
+Add functions to a.go and b.go
+
+-- a.go --
+package main
+
+func a() {
+ // Update 1
+}
+
+-- b.go --
+package main
+
+func b() {
+ // Update 1
+}
+
+-- .commit --
+Add functions to a.go and b.go again
+
+-- a.go --
+package main
+
+func a() {
+ // Update 2
+}
+
+-- b.go --
+package main
+
+func b() {
+ // Update 2
+}
+
+-- .commit --
+Add functions to a.go and c.go
+
+-- a.go --
+package main
+
+func a() {
+ // Update 3
+}
+
+-- c.go --
+package main
+
+func c() {
+ // Update 1
+}
+
+-- .commit --
+Update file a.go only (first time)
+
+-- a.go --
+package main
+
+func a() {
+ // Update 4 - first analysis
+}
+
+-- .commit --
+First analysis of a.go change
+
+-- .run_test --
+OK
+-- a.go --
+package main
+
+func a() {
+ // Update 5 - second analysis (should be cached)
+}
+
+-- .commit --
+Second analysis of a.go change (should be cached)
+
+-- .run_test --
+OK
diff --git a/claudetool/codereview/testdata/related_files_cache_new_file_in_set.txtar b/claudetool/codereview/testdata/related_files_cache_new_file_in_set.txtar
new file mode 100644
index 0000000..4c32fa9
--- /dev/null
+++ b/claudetool/codereview/testdata/related_files_cache_new_file_in_set.txtar
@@ -0,0 +1,108 @@
+Tests related files caching when some files have been reported but new ones are present
+
+-- a.go --
+package main
+
+func a() {}
+
+-- b.go --
+package main
+
+func b() {}
+
+-- c.go --
+package main
+
+func c() {}
+
+-- d.go --
+package main
+
+func d() {}
+
+-- .commit --
+Create initial commit
+
+-- a.go --
+package main
+
+func a() {
+ // Update 1
+}
+
+-- b.go --
+package main
+
+func b() {
+ // Update 1
+}
+
+-- .commit --
+Update a.go and b.go together
+
+-- a.go --
+package main
+
+func a() {
+ // Update 2
+}
+
+-- c.go --
+package main
+
+func c() {
+ // Update 1
+}
+
+-- .commit --
+Update a.go and c.go together
+
+-- a.go --
+package main
+
+func a() {
+ // Update 3
+}
+
+-- d.go --
+package main
+
+func d() {
+ // Update 1
+}
+
+-- .commit --
+Update a.go and d.go together
+
+-- a.go --
+package main
+
+func a() {
+ // Update 4 - first time, will report b.go, c.go, d.go
+}
+
+-- .commit --
+First review reports all related files
+
+-- .run_test --
+OK
+-- b.go --
+package main
+
+func b() {
+ // Update 2
+}
+
+-- c.go --
+package main
+
+func c() {
+ // Update 2 - b.go and c.go already reported, but should still return full set
+ // because this is a different changeset that includes both files
+}
+
+-- .commit --
+Different changeset with two files, one relationship is new
+
+-- .run_test --
+OK
diff --git a/claudetool/codereview/testdata/related_files_cache_same_changeset.txtar b/claudetool/codereview/testdata/related_files_cache_same_changeset.txtar
new file mode 100644
index 0000000..db640a5
--- /dev/null
+++ b/claudetool/codereview/testdata/related_files_cache_same_changeset.txtar
@@ -0,0 +1,95 @@
+Tests related files caching when the exact same set of changed files is processed multiple times
+
+-- a.go --
+package main
+
+func a() {}
+
+-- b.go --
+package main
+
+func b() {}
+
+-- c.go --
+package main
+
+func c() {}
+
+-- .commit --
+Create initial commit
+
+-- a.go --
+package main
+
+func a() {
+ // Update 1
+}
+
+-- b.go --
+package main
+
+func b() {
+ // Update 1
+}
+
+-- .commit --
+Update both a.go and b.go together (creates relationship)
+
+-- a.go --
+package main
+
+func a() {
+ // Update 2
+}
+
+-- b.go --
+package main
+
+func b() {
+ // Update 2
+}
+
+-- .commit --
+Update both a.go and b.go together again (strengthens relationship)
+
+-- a.go --
+package main
+
+func a() {
+ // Update 3
+}
+
+-- c.go --
+package main
+
+func c() {
+ // Update 1
+}
+
+-- .commit --
+Update a.go and c.go together (creates another relationship)
+
+-- a.go --
+package main
+
+func a() {
+ // Update 4 - first time processing this exact set
+}
+
+-- .commit --
+First time changing just a.go
+
+-- .run_test --
+OK
+-- a.go --
+package main
+
+func a() {
+ // Update 5 - second time processing this exact same set, should be cached
+}
+
+-- .commit --
+Second time changing just a.go (should be cached)
+
+-- .run_test --
+OK