blob: 411195a7f1c92fae09b4b913df3c7381deb046cf [file] [log] [blame]
package codereview
import (
"bytes"
"context"
"fmt"
"log/slog"
"os"
"os/exec"
"path/filepath"
"slices"
"strings"
"sync"
"sketch.dev/claudetool"
)
// A CodeReviewer manages quality checks.
type CodeReviewer struct {
repoRoot string
sketchBaseRef string
initialStatus []fileStatus // git status of files at initial commit, absolute paths
reviewed []string // history of all commits which have been reviewed
initialWorktree string // git worktree at initial commit, absolute path
// "Related files" caching
processedChangedFileSets map[string]bool // hash of sorted changedFiles -> processed
reportedRelatedFiles map[string]bool // file path -> reported
// Pre-warming of Go build/test cache
warmMutex sync.Mutex // protects warmedPackages map
warmedPackages map[string]bool // packages that have been cache warmed
}
func NewCodeReviewer(ctx context.Context, repoRoot, sketchBaseRef string) (*CodeReviewer, error) {
r := &CodeReviewer{
repoRoot: repoRoot,
sketchBaseRef: sketchBaseRef,
processedChangedFileSets: make(map[string]bool),
reportedRelatedFiles: make(map[string]bool),
warmedPackages: make(map[string]bool),
}
if r.repoRoot == "" {
return nil, fmt.Errorf("NewCodeReviewer: repoRoot must be non-empty")
}
if r.sketchBaseRef == "" {
return nil, fmt.Errorf("NewCodeReviewer: sketchBaseRef must be non-empty")
}
// Confirm that root is in fact the git repo root.
root, err := claudetool.FindRepoRoot(r.repoRoot)
if err != nil {
return nil, err
}
if root != r.repoRoot {
return nil, fmt.Errorf("NewCodeReviewer: repoRoot=%q but git repo root is %q", r.repoRoot, root)
}
// Get an initial list of dirty and untracked files.
// We'll filter them out later when deciding whether the worktree is clean.
status, err := r.repoStatus(ctx)
if err != nil {
return nil, err
}
r.initialStatus = status
return r, nil
}
// autoformat formats all files changed in HEAD.
// It returns a list of all files that were formatted.
// It is best-effort only.
func (r *CodeReviewer) autoformat(ctx context.Context) []string {
// Refuse to format if initial commit is not an ancestor of HEAD
err := r.requireHEADDescendantOfSketchBaseRef(ctx)
if err != nil {
slog.WarnContext(ctx, "CodeReviewer.Autoformat refusing to format", "err", err)
return nil
}
head, err := r.CurrentCommit(ctx)
if err != nil {
slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get current commit", "err", err)
return nil
}
parent, err := r.ResolveCommit(ctx, "HEAD^1")
if err != nil {
slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get parent commit", "err", err)
return nil
}
// Retrieve a list of all files changed
// TODO: instead of one git diff --name-only and then N --name-status, do one --name-status.
changedFiles, err := r.changedFiles(ctx, r.sketchBaseRef, head)
if err != nil {
slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get changed files", "err", err)
return nil
}
// General strategy: For all changed files,
// run the strictest formatter that passes on the original version.
// TODO: add non-Go formatters?
// TODO: at a minimum, for common file types, ensure trailing newlines and maybe trim trailing whitespace per line?
var fmtFiles []string
for _, file := range changedFiles {
if !strings.HasSuffix(file, ".go") {
continue
}
fileStatus, err := r.gitFileStatus(ctx, file)
if err != nil {
slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get file status", "file", file, "err", err)
continue
}
if fileStatus == "D" { // deleted, nothing to format
continue
}
code, err := r.getFileContentAtCommit(ctx, file, head)
if err != nil {
slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get file content at head", "file", file, "err", err)
continue
}
if claudetool.IsAutogeneratedGoFile(code) { // leave autogenerated files alone
continue
}
onDisk, err := os.ReadFile(file)
if err != nil {
slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to read file", "file", file, "err", err)
continue
}
if !bytes.Equal(code, onDisk) { // file has been modified since HEAD
slog.WarnContext(ctx, "CodeReviewer.Autoformat file modified since HEAD", "file", file, "err", err)
continue
}
var formatterToUse string
if fileStatus == "A" {
formatterToUse = "gofumpt" // newly added, so we can format how we please: use gofumpt
} else {
prev, err := r.getFileContentAtCommit(ctx, file, parent)
if err != nil {
slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to get file content at parent", "file", file, "err", err)
continue
}
formatterToUse = r.pickFormatter(ctx, prev) // pick the strictest formatter that passes on the original version
}
// Apply the chosen formatter to the current file
newCode := r.runFormatter(ctx, formatterToUse, code)
if newCode == nil { // no changes made
continue
}
// write to disk
if err := os.WriteFile(file, newCode, 0o600); err != nil {
slog.WarnContext(ctx, "CodeReviewer.Autoformat unable to write formatted file", "file", file, "err", err)
continue
}
fmtFiles = append(fmtFiles, file)
}
return fmtFiles
}
// RequireNormalGitState checks that the git repo state is pretty normal.
func (r *CodeReviewer) RequireNormalGitState(_ context.Context) error {
rebaseDirs := []string{"rebase-merge", "rebase-apply"}
for _, dir := range rebaseDirs {
_, err := os.Stat(filepath.Join(r.repoRoot, dir))
if err == nil {
return fmt.Errorf("git repo is not clean: rebase in progress")
}
}
filesReason := map[string]string{
"MERGE_HEAD": "merge is in progress",
"CHERRY_PICK_HEAD": "cherry-pick is in progress",
"REVERT_HEAD": "revert is in progress",
"BISECT_LOG": "bisect is in progress",
}
for file, reason := range filesReason {
_, err := os.Stat(filepath.Join(r.repoRoot, file))
if err == nil {
return fmt.Errorf("git repo is not clean: %s", reason)
}
}
return nil
}
func (r *CodeReviewer) RequireNoUncommittedChanges(ctx context.Context) error {
// Check that there are no uncommitted changes, whether staged or not.
// (Changes in r.initialStatus are OK, no other changes are.)
statuses, err := r.repoStatus(ctx)
if err != nil {
return fmt.Errorf("unable to get repo status: %w", err)
}
uncommitted := new(strings.Builder)
for _, status := range statuses {
if !statusesContainFile(r.initialStatus, status.Path) {
fmt.Fprintf(uncommitted, "%s %s\n", status.RawStatus, status.Path)
}
}
if uncommitted.Len() > 0 {
return fmt.Errorf("uncommitted changes in repo, please commit relevant changes and revert/delete others:\n%s", uncommitted.String())
}
return nil
}
func statusesContainFile(statuses []fileStatus, file string) bool {
for _, s := range statuses {
if s.Path == file {
return true
}
}
return false
}
// parseGitStatusLine parses a single line from git status --porcelain output.
// Returns the file path and status, or empty strings if the line should be ignored.
func parseGitStatusLine(line string) (path, status string) {
if len(line) <= 3 {
return "", "" // empty line or invalid format
}
return strings.TrimSpace(line[3:]), line[:2]
}
type fileStatus struct {
Path string
RawStatus string // always 2 characters
}
func (r *CodeReviewer) repoStatus(ctx context.Context) ([]fileStatus, error) {
// Run git status --porcelain, split into lines
cmd := exec.CommandContext(ctx, "git", "status", "--porcelain")
cmd.Dir = r.repoRoot
out, err := cmd.CombinedOutput()
if err != nil {
return nil, fmt.Errorf("failed to run git status: %w\n%s", err, out)
}
var statuses []fileStatus
for line := range strings.Lines(string(out)) {
path, status := parseGitStatusLine(line)
if path == "" {
continue // empty or invalid line
}
absPath := r.absPath(path)
statuses = append(statuses, fileStatus{Path: absPath, RawStatus: status})
}
return statuses, nil
}
// CurrentCommit retrieves the current git commit hash
func (r *CodeReviewer) CurrentCommit(ctx context.Context) (string, error) {
return r.ResolveCommit(ctx, "HEAD")
}
func (r *CodeReviewer) ResolveCommit(ctx context.Context, ref string) (string, error) {
cmd := exec.CommandContext(ctx, "git", "rev-parse", ref)
cmd.Dir = r.repoRoot
out, err := cmd.CombinedOutput()
if err != nil {
return "", fmt.Errorf("failed to get current commit hash: %w\n%s", err, out)
}
return strings.TrimSpace(string(out)), nil
}
func (r *CodeReviewer) absPath(relPath string) string {
if filepath.IsAbs(relPath) {
return relPath
}
return filepath.Clean(filepath.Join(r.repoRoot, relPath))
}
// gitFileStatus returns the status of a file (A for added, M for modified, D for deleted, etc.)
func (r *CodeReviewer) gitFileStatus(ctx context.Context, file string) (string, error) {
cmd := exec.CommandContext(ctx, "git", "diff", "--name-status", r.sketchBaseRef, "HEAD", "--", file)
cmd.Dir = r.repoRoot
out, err := cmd.CombinedOutput()
if err != nil {
return "", fmt.Errorf("failed to get file status: %w\n%s", err, out)
}
status := strings.TrimSpace(string(out))
if status == "" {
return "", fmt.Errorf("no status found for file: %s", file)
}
return string(status[0]), nil
}
// getFileContentAtCommit retrieves file content at a specific commit
func (r *CodeReviewer) getFileContentAtCommit(ctx context.Context, file, commit string) ([]byte, error) {
relFile, err := filepath.Rel(r.repoRoot, file)
if err != nil {
slog.WarnContext(ctx, "CodeReviewer.getFileContentAtCommit: failed to get relative path", "repo_root", r.repoRoot, "file", file, "err", err)
file = relFile
}
cmd := exec.CommandContext(ctx, "git", "show", fmt.Sprintf("%s:%s", commit, relFile))
cmd.Dir = r.repoRoot
out, err := cmd.CombinedOutput()
if err != nil {
return nil, fmt.Errorf("failed to get file content at commit %s: %w\n%s", commit, err, out)
}
return out, nil
}
// runFormatter runs the specified formatter on a file and returns the results.
// A nil result indicates that the file is unchanged, or that an error occurred.
func (r *CodeReviewer) runFormatter(ctx context.Context, formatter string, content []byte) []byte {
if formatter == "" {
return nil // no formatter
}
// Run the formatter and capture the output
cmd := exec.CommandContext(ctx, formatter)
cmd.Dir = r.repoRoot
cmd.Stdin = bytes.NewReader(content)
out, err := cmd.CombinedOutput()
if err != nil {
// probably a parse error, err on the side of safety
return nil
}
if bytes.Equal(content, out) {
return nil // no changes
}
return out
}
// formatterWouldChange reports whether a formatter would make changes to the content.
// If the contents are invalid, it returns false.
// It works by piping the content to the formatter with the -l flag.
func (r *CodeReviewer) formatterWouldChange(ctx context.Context, formatter string, content []byte) bool {
cmd := exec.CommandContext(ctx, formatter, "-l")
cmd.Dir = r.repoRoot
cmd.Stdin = bytes.NewReader(content)
out, err := cmd.CombinedOutput()
if err != nil {
// probably a parse error, err on the side of safety
return false
}
// If the output is empty, the file passes the formatter
// If the output contains "<standard input>", the file would be changed
return len(bytes.TrimSpace(out)) > 0
}
// pickFormatter picks a formatter to use for code.
// If something goes wrong, it recommends no formatter (empty string).
func (r *CodeReviewer) pickFormatter(ctx context.Context, code []byte) string {
// Test each formatter from strictest to least strict.
// Keep the first one that doesn't make changes.
formatters := []string{"gofumpt", "goimports", "gofmt"}
for _, formatter := range formatters {
if r.formatterWouldChange(ctx, formatter, code) {
continue
}
return formatter
}
return "" // no safe formatter found
}
// changedFiles retrieves a list of all files changed between two commits
func (r *CodeReviewer) changedFiles(ctx context.Context, fromCommit, toCommit string) ([]string, error) {
cmd := exec.CommandContext(ctx, "git", "diff", "--name-only", fromCommit, toCommit)
cmd.Dir = r.repoRoot
out, err := cmd.CombinedOutput()
if err != nil {
return nil, fmt.Errorf("failed to get changed files: %w\n%s", err, out)
}
var files []string
for line := range strings.Lines(string(out)) {
line = strings.TrimSpace(line)
if len(line) == 0 {
continue
}
path := r.absPath(line)
if statusesContainFile(r.initialStatus, path) {
continue
}
files = append(files, path)
}
return files, nil
}
// runGenerate runs go generate on all packages and returns a list of files changed.
// Errors returned will be reported to the LLM.
func (r *CodeReviewer) runGenerate(ctx context.Context, packages []string) ([]string, error) {
if len(packages) == 0 {
return nil, nil
}
args := []string{"generate"}
for _, pkg := range packages {
// Sigh. Working around test packages is a PITA.
if strings.HasSuffix(pkg, ".test") || strings.HasSuffix(pkg, "_test") {
continue
}
args = append(args, pkg)
}
gen := exec.CommandContext(ctx, "go", args...)
gen.Dir = r.repoRoot
out, err := gen.CombinedOutput()
if err != nil {
return nil, fmt.Errorf("$ go %s\n%s", strings.Join(args, " "), out)
}
status := exec.CommandContext(ctx, "git", "status", "--porcelain")
status.Dir = r.repoRoot
statusOut, err := status.CombinedOutput()
if err != nil {
return nil, fmt.Errorf("unable to get git status: %w", err)
}
var changed []string
for line := range strings.Lines(string(statusOut)) {
path, _ := parseGitStatusLine(line)
if path == "" {
continue
}
absPath := filepath.Join(r.repoRoot, path)
if statusesContainFile(r.initialStatus, absPath) {
continue
}
changed = append(changed, absPath)
}
return changed, nil
}
// isGoRepository checks if the repository contains a go.mod file
// TODO: check in subdirs?
func (r *CodeReviewer) isGoRepository() bool {
_, err := os.Stat(filepath.Join(r.repoRoot, "go.mod"))
return err == nil
}
// ModTidy runs go mod tidy if go module files have changed.
// Returns a list of files changed by go mod tidy (empty if none).
func (r *CodeReviewer) ModTidy(ctx context.Context) ([]string, error) {
err := r.requireHEADDescendantOfSketchBaseRef(ctx)
if err != nil {
return nil, fmt.Errorf("cannot run ModTidy: %w", err)
}
// Check if any go.mod, go.sum, etc. files have changed
currentCommit, err := r.CurrentCommit(ctx)
if err != nil {
return nil, fmt.Errorf("failed to get current commit: %w", err)
}
changedFiles, err := r.changedFiles(ctx, r.sketchBaseRef, currentCommit)
if err != nil {
return nil, fmt.Errorf("failed to get changed files: %w", err)
}
// Check if any of the changed files are go module files
goModsChanged := false
for _, file := range changedFiles {
if isGoModFile(file) {
goModsChanged = true
break
}
}
if !goModsChanged {
// No go module files changed, so don't run go mod tidy
return nil, nil
}
// Run go mod tidy
cmd := exec.CommandContext(ctx, "go", "mod", "tidy")
cmd.Dir = r.repoRoot
out, err := cmd.CombinedOutput()
if err != nil {
return nil, fmt.Errorf("go mod tidy failed: %w\n%s", err, out)
}
// Check which files were changed by go mod tidy
statusCmd := exec.CommandContext(ctx, "git", "status", "--porcelain")
statusCmd.Dir = r.repoRoot
statusOut, err := statusCmd.CombinedOutput()
if err != nil {
return nil, fmt.Errorf("unable to get git status: %w", err)
}
var changedByTidy []string
for line := range strings.Lines(string(statusOut)) {
file, _ := parseGitStatusLine(line)
if file == "" {
continue // empty or invalid line
}
if !isGoModFile(file) {
continue
}
path := filepath.Join(r.repoRoot, file)
changedByTidy = append(changedByTidy, path)
}
return changedByTidy, nil
}
// RunMechanicalChecks runs all mechanical checks and returns a message describing any changes made.
func (r *CodeReviewer) RunMechanicalChecks(ctx context.Context) string {
var actions []string
changed := r.autoformat(ctx)
if len(changed) > 0 {
actions = append(actions, "autoformatters")
}
// Run go mod tidy (only if go module files have changed)
tidyChanges, err := r.ModTidy(ctx)
if err != nil {
slog.WarnContext(ctx, "CodeReviewer.RunMechanicalChecks: ModTidy failed", "err", err)
}
if len(tidyChanges) > 0 {
changed = append(changed, tidyChanges...)
actions = append(actions, "`go mod tidy`")
}
if len(changed) == 0 {
return ""
}
slices.Sort(changed)
msg := fmt.Sprintf(`I ran %s, which updated these files:
%s
Please amend your latest git commit with these changes and then continue with what you were doing.`,
strings.Join(actions, " and "),
strings.Join(changed, "\n"),
)
return msg
}
// isGoModFile returns true if the file is a Go module file (go.mod, go.sum, etc.)
func isGoModFile(path string) bool {
basename := filepath.Base(path)
return strings.HasPrefix(basename, "go.")
}