git_tools: Implement git diff and show API

Added git_tools package providing structured access to git diff and show commands. Exposed these methods via HTTP endpoints in loophttp.

This is a stepping stone to a better diff view.

Co-Authored-By: sketch <hello@sketch.dev>
Change-ID: se75f0a1b2c3d4e5k
diff --git a/git_tools/git_tools.go b/git_tools/git_tools.go
new file mode 100644
index 0000000..f8a1807
--- /dev/null
+++ b/git_tools/git_tools.go
@@ -0,0 +1,230 @@
+// Package git_tools provides utilities for interacting with Git repositories.
+package git_tools
+
+import (
+	"bufio"
+	"fmt"
+	"os/exec"
+	"strings"
+)
+
+// DiffFile represents a file in a Git diff
+type DiffFile struct {
+	Path    string `json:"path"`
+	OldMode string `json:"old_mode"`
+	NewMode string `json:"new_mode"`
+	OldHash string `json:"old_hash"`
+	NewHash string `json:"new_hash"`
+	Status  string `json:"status"` // A=added, M=modified, D=deleted, etc.
+} // GitRawDiff returns a structured representation of the Git diff between two commits or references
+func GitRawDiff(repoDir, from, to string) ([]DiffFile, error) {
+	// Git command to generate the diff in raw format with full hashes
+	cmd := exec.Command("git", "-C", repoDir, "diff", "--raw", "--abbrev=40", from, to)
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		return nil, fmt.Errorf("error executing git diff: %w - %s", err, string(out))
+	}
+
+	// Parse the raw diff output into structured format
+	return parseRawDiff(string(out))
+}
+
+// GitShow returns the result of git show for a specific commit hash
+func GitShow(repoDir, hash string) (string, error) {
+	cmd := exec.Command("git", "-C", repoDir, "show", hash)
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		return "", fmt.Errorf("error executing git show: %w - %s", err, string(out))
+	}
+	return string(out), nil
+}
+
+// parseRawDiff converts git diff --raw output into structured format
+func parseRawDiff(diffOutput string) ([]DiffFile, error) {
+	var files []DiffFile
+	if diffOutput == "" {
+		return files, nil
+	}
+
+	// Process diff output line by line
+	scanner := bufio.NewScanner(strings.NewReader(strings.TrimSpace(diffOutput)))
+	for scanner.Scan() {
+		line := scanner.Text()
+		// Format: :oldmode newmode oldhash newhash status\tpath
+		// Example: :000000 100644 0000000000000000000000000000000000000000 6b33680ae6de90edd5f627c84147f7a41aa9d9cf A        git_tools/git_tools.go
+		if !strings.HasPrefix(line, ":") {
+			continue
+		}
+
+		parts := strings.Fields(line[1:]) // Skip the leading colon
+		if len(parts) < 5 {
+			continue // Not enough parts, skip this line
+		}
+
+		oldMode := parts[0]
+		newMode := parts[1]
+		oldHash := parts[2]
+		newHash := parts[3]
+		status := parts[4]
+
+		// The path is everything after the status character and tab
+		pathIndex := strings.Index(line, status) + len(status) + 1 // +1 for the tab
+		path := ""
+		if pathIndex < len(line) {
+			path = strings.TrimSpace(line[pathIndex:])
+		}
+
+		files = append(files, DiffFile{
+			Path:    path,
+			OldMode: oldMode,
+			NewMode: newMode,
+			OldHash: oldHash,
+			NewHash: newHash,
+			Status:  status,
+		})
+	}
+
+	return files, nil
+}
+
+// LogEntry represents a single entry in the git log
+type LogEntry struct {
+	Hash    string   `json:"hash"`    // The full commit hash
+	Refs    []string `json:"refs"`    // References (branches, tags) pointing to this commit
+	Subject string   `json:"subject"` // The commit subject/message
+}
+
+// GitRecentLog returns the recent commit log between the initial commit and HEAD
+func GitRecentLog(repoDir string, initialCommitHash string) ([]LogEntry, error) {
+	// Validate input
+	if initialCommitHash == "" {
+		return nil, fmt.Errorf("initial commit hash must be provided")
+	}
+
+	// Find merge-base of HEAD and initial commit
+	cmdMergeBase := exec.Command("git", "-C", repoDir, "merge-base", "HEAD", initialCommitHash)
+	mergeBase, err := cmdMergeBase.CombinedOutput()
+	if err != nil {
+		// If merge-base fails (which can happen in simple repos), use initialCommitHash
+		return getGitLog(repoDir, initialCommitHash)
+	}
+
+	mergeBaseHash := strings.TrimSpace(string(mergeBase))
+	if mergeBaseHash == "" {
+		// If merge-base doesn't return a valid hash, use initialCommitHash
+		return getGitLog(repoDir, initialCommitHash)
+	}
+
+	// Use the merge-base as the 'from' point
+	return getGitLog(repoDir, mergeBaseHash)
+}
+
+// getGitLog gets the git log with the specified format using the provided fromCommit
+func getGitLog(repoDir string, fromCommit string) ([]LogEntry, error) {
+	// Check if fromCommit~10 exists (10 commits before fromCommit)
+	checkCmd := exec.Command("git", "-C", repoDir, "rev-parse", "--verify", fromCommit+"~10")
+	if err := checkCmd.Run(); err != nil {
+		// If fromCommit~10 doesn't exist, use just fromCommit..HEAD as the range
+		cmd := exec.Command("git", "-C", repoDir, "log", "-n", "1000", "--oneline", "--decorate", "--pretty=%H%x00%s%x00%d", fromCommit+"..HEAD")
+		out, err := cmd.CombinedOutput()
+		if err != nil {
+			return nil, fmt.Errorf("error executing git log: %w - %s", err, string(out))
+		}
+		return parseGitLog(string(out))
+	}
+
+	// Use fromCommit~10..HEAD range with the specified format for easy parsing
+	cmd := exec.Command("git", "-C", repoDir, "log", "-n", "1000", "--oneline", "--decorate", "--pretty=%H%x00%s%x00%d", fromCommit+"~10..HEAD")
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		return nil, fmt.Errorf("error executing git log: %w - %s", err, string(out))
+	}
+
+	return parseGitLog(string(out))
+}
+
+// parseGitLog parses the output of git log with null-separated fields
+func parseGitLog(logOutput string) ([]LogEntry, error) {
+	var entries []LogEntry
+	if logOutput == "" {
+		return entries, nil
+	}
+
+	scanner := bufio.NewScanner(strings.NewReader(strings.TrimSpace(logOutput)))
+	for scanner.Scan() {
+		line := scanner.Text()
+		parts := strings.Split(line, "\x00")
+		if len(parts) != 3 {
+			continue // Skip malformed lines
+		}
+
+		hash := parts[0]
+		subject := parts[1]
+		decoration := parts[2]
+
+		// Parse the refs from the decoration
+		refs := parseRefs(decoration)
+
+		entries = append(entries, LogEntry{
+			Hash:    hash,
+			Refs:    refs,
+			Subject: subject,
+		})
+	}
+
+	return entries, nil
+}
+
+// parseRefs extracts references from git decoration format
+func parseRefs(decoration string) []string {
+	// The decoration format from %d is: (HEAD -> main, origin/main, tag: v1.0.0)
+	if decoration == "" {
+		return nil
+	}
+
+	// Remove surrounding parentheses and whitespace
+	decoration = strings.TrimSpace(decoration)
+	decoration = strings.TrimPrefix(decoration, " (")
+	decoration = strings.TrimPrefix(decoration, "(")
+	decoration = strings.TrimSuffix(decoration, ")")
+	decoration = strings.TrimSuffix(decoration, ") ")
+
+	if decoration == "" {
+		return nil
+	}
+
+	// Split by comma
+	parts := strings.Split(decoration, ", ")
+
+	// Process each part
+	var refs []string
+	for _, part := range parts {
+		part = strings.TrimSpace(part)
+		if part == "" {
+			continue
+		}
+
+		// Handle HEAD -> branch format
+		if strings.HasPrefix(part, "HEAD -> ") {
+			refs = append(refs, strings.TrimPrefix(part, "HEAD -> "))
+			continue
+		}
+
+		// Handle tag: format
+		if strings.HasPrefix(part, "tag: ") {
+			refs = append(refs, strings.TrimPrefix(part, "tag: "))
+			continue
+		}
+
+		// Handle just HEAD (no branch)
+		if part == "HEAD" {
+			refs = append(refs, part)
+			continue
+		}
+
+		// Regular branch name
+		refs = append(refs, part)
+	}
+
+	return refs
+}
diff --git a/git_tools/git_tools_test.go b/git_tools/git_tools_test.go
new file mode 100644
index 0000000..5be6b0f
--- /dev/null
+++ b/git_tools/git_tools_test.go
@@ -0,0 +1,365 @@
+package git_tools
+
+import (
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+func setupTestRepo(t *testing.T) string {
+	// Create a temporary directory for the test repository
+	tempDir, err := os.MkdirTemp("", "git-tools-test")
+	if err != nil {
+		t.Fatalf("Failed to create temp directory: %v", err)
+	}
+
+	// Initialize a git repository
+	cmd := exec.Command("git", "-C", tempDir, "init")
+	if out, err := cmd.CombinedOutput(); err != nil {
+		t.Fatalf("Failed to initialize git repo: %v - %s", err, out)
+	}
+
+	// Configure git user
+	cmd = exec.Command("git", "-C", tempDir, "config", "user.email", "test@example.com")
+	if out, err := cmd.CombinedOutput(); err != nil {
+		t.Fatalf("Failed to configure git user email: %v - %s", err, out)
+	}
+
+	cmd = exec.Command("git", "-C", tempDir, "config", "user.name", "Test User")
+	if out, err := cmd.CombinedOutput(); err != nil {
+		t.Fatalf("Failed to configure git user name: %v - %s", err, out)
+	}
+
+	return tempDir
+}
+
+func createAndCommitFile(t *testing.T, repoDir, filename, content string, stage bool) string {
+	filePath := filepath.Join(repoDir, filename)
+	if err := os.WriteFile(filePath, []byte(content), 0644); err != nil {
+		t.Fatalf("Failed to write file: %v", err)
+	}
+
+	if stage {
+		cmd := exec.Command("git", "-C", repoDir, "add", filename)
+		if out, err := cmd.CombinedOutput(); err != nil {
+			t.Fatalf("Failed to add file: %v - %s", err, out)
+		}
+
+		cmd = exec.Command("git", "-C", repoDir, "commit", "-m", "Add "+filename)
+		if out, err := cmd.CombinedOutput(); err != nil {
+			t.Fatalf("Failed to commit file: %v - %s", err, out)
+		}
+
+		// Get the commit hash
+		cmd = exec.Command("git", "-C", repoDir, "rev-parse", "HEAD")
+		out, err := cmd.Output()
+		if err != nil {
+			t.Fatalf("Failed to get commit hash: %v", err)
+		}
+		return string(out[:len(out)-1]) // Trim newline
+	}
+
+	return ""
+}
+
+func TestGitRawDiff(t *testing.T) {
+	repoDir := setupTestRepo(t)
+	defer os.RemoveAll(repoDir)
+
+	// Create initial file
+	initHash := createAndCommitFile(t, repoDir, "test.txt", "initial content\n", true)
+
+	// Modify the file
+	modHash := createAndCommitFile(t, repoDir, "test.txt", "initial content\nmodified content\n", true)
+
+	// Test the diff between the two commits
+	diff, err := GitRawDiff(repoDir, initHash, modHash)
+	if err != nil {
+		t.Fatalf("GitRawDiff failed: %v", err)
+	}
+
+	if len(diff) != 1 {
+		t.Fatalf("Expected 1 file in diff, got %d", len(diff))
+	}
+
+	if diff[0].Path != "test.txt" {
+		t.Errorf("Expected path to be test.txt, got %s", diff[0].Path)
+	}
+
+	if diff[0].Status != "M" {
+		t.Errorf("Expected status to be M (modified), got %s", diff[0].Status)
+	}
+
+	if diff[0].OldMode == "" || diff[0].NewMode == "" {
+		t.Error("Expected file modes to be present")
+	}
+
+	if diff[0].OldHash == "" || diff[0].NewHash == "" {
+		t.Error("Expected file hashes to be present")
+	}
+
+	// Test with invalid commit hash
+	_, err = GitRawDiff(repoDir, "invalid", modHash)
+	if err == nil {
+		t.Error("Expected error for invalid commit hash, got none")
+	}
+}
+
+func TestGitShow(t *testing.T) {
+	repoDir := setupTestRepo(t)
+	defer os.RemoveAll(repoDir)
+
+	// Create file and commit
+	commitHash := createAndCommitFile(t, repoDir, "test.txt", "test content\n", true)
+
+	// Test GitShow
+	show, err := GitShow(repoDir, commitHash)
+	if err != nil {
+		t.Fatalf("GitShow failed: %v", err)
+	}
+
+	if show == "" {
+		t.Error("Expected non-empty output from GitShow")
+	}
+
+	// Test with invalid commit hash
+	_, err = GitShow(repoDir, "invalid")
+	if err == nil {
+		t.Error("Expected error for invalid commit hash, got none")
+	}
+}
+
+func TestParseGitLog(t *testing.T) {
+	// Test with the format from --pretty="%H%x00%s%x00%d"
+	logOutput := "abc123\x00Initial commit\x00 (HEAD -> main, origin/main)\n" +
+		"def456\x00Add feature X\x00 (tag: v1.0.0)\n" +
+		"ghi789\x00Fix bug Y\x00"
+
+	entries, err := parseGitLog(logOutput)
+	if err != nil {
+		t.Fatalf("parseGitLog returned error: %v", err)
+	}
+
+	if len(entries) != 3 {
+		t.Fatalf("Expected 3 log entries, got %d", len(entries))
+	}
+
+	// Check first entry
+	if entries[0].Hash != "abc123" {
+		t.Errorf("Expected hash abc123, got %s", entries[0].Hash)
+	}
+	if len(entries[0].Refs) != 2 {
+		t.Errorf("Expected 2 refs, got %d", len(entries[0].Refs))
+	}
+	if entries[0].Refs[0] != "main" || entries[0].Refs[1] != "origin/main" {
+		t.Errorf("Incorrect refs parsed: %v", entries[0].Refs)
+	}
+	if entries[0].Subject != "Initial commit" {
+		t.Errorf("Expected subject 'Initial commit', got '%s'", entries[0].Subject)
+	}
+
+	// Check second entry
+	if entries[1].Hash != "def456" {
+		t.Errorf("Expected hash def456, got %s", entries[1].Hash)
+	}
+	if len(entries[1].Refs) != 1 {
+		t.Errorf("Expected 1 ref, got %d", len(entries[1].Refs))
+	}
+	if entries[1].Refs[0] != "v1.0.0" {
+		t.Errorf("Incorrect tag parsed: %v", entries[1].Refs)
+	}
+	if entries[1].Subject != "Add feature X" {
+		t.Errorf("Expected subject 'Add feature X', got '%s'", entries[1].Subject)
+	}
+
+	// Check third entry
+	if entries[2].Hash != "ghi789" {
+		t.Errorf("Expected hash ghi789, got %s", entries[2].Hash)
+	}
+	if len(entries[2].Refs) != 0 {
+		t.Errorf("Expected 0 refs, got %d", len(entries[2].Refs))
+	}
+	if entries[2].Subject != "Fix bug Y" {
+		t.Errorf("Expected subject 'Fix bug Y', got '%s'", entries[2].Subject)
+	}
+}
+
+func TestParseRefs(t *testing.T) {
+	testCases := []struct {
+		decoration string
+		expected   []string
+	}{
+		{"(HEAD -> main, origin/main)", []string{"main", "origin/main"}},
+		{"(tag: v1.0.0)", []string{"v1.0.0"}},
+		{"(HEAD -> feature/branch, origin/feature/branch, tag: v0.9.0)", []string{"feature/branch", "origin/feature/branch", "v0.9.0"}},
+		{" (tag: v2.0.0) ", []string{"v2.0.0"}},
+		{"", nil},
+		{" ", nil},
+		{"()", nil},
+	}
+
+	for i, tc := range testCases {
+		refs := parseRefs(tc.decoration)
+
+		if len(refs) != len(tc.expected) {
+			t.Errorf("Case %d: Expected %d refs, got %d", i, len(tc.expected), len(refs))
+			continue
+		}
+
+		for j, ref := range refs {
+			if j >= len(tc.expected) || ref != tc.expected[j] {
+				t.Errorf("Case %d: Expected ref '%s', got '%s'", i, tc.expected[j], ref)
+			}
+		}
+	}
+}
+
+func TestGitRecentLog(t *testing.T) {
+	// Create a temporary directory for the test repository
+	tmpDir, err := os.MkdirTemp("", "git-test-*")
+	if err != nil {
+		t.Fatalf("Failed to create temp dir: %v", err)
+	}
+	defer os.RemoveAll(tmpDir)
+
+	// Initialize a git repository
+	initCmd := exec.Command("git", "-C", tmpDir, "init")
+	if out, err := initCmd.CombinedOutput(); err != nil {
+		t.Fatalf("Failed to initialize git repository: %v\n%s", err, out)
+	}
+
+	// Configure git user for the test repository
+	exec.Command("git", "-C", tmpDir, "config", "user.name", "Test User").Run()
+	exec.Command("git", "-C", tmpDir, "config", "user.email", "test@example.com").Run()
+
+	// Create initial commit
+	initialFile := filepath.Join(tmpDir, "initial.txt")
+	os.WriteFile(initialFile, []byte("initial content"), 0644)
+	exec.Command("git", "-C", tmpDir, "add", "initial.txt").Run()
+	initialCommitCmd := exec.Command("git", "-C", tmpDir, "commit", "-m", "Initial commit")
+	out, err := initialCommitCmd.CombinedOutput()
+	if err != nil {
+		t.Fatalf("Failed to create initial commit: %v\n%s", err, out)
+	}
+
+	// Get the initial commit hash
+	initialCommitCmd = exec.Command("git", "-C", tmpDir, "rev-parse", "HEAD")
+	initialCommitBytes, err := initialCommitCmd.Output()
+	if err != nil {
+		t.Fatalf("Failed to get initial commit hash: %v", err)
+	}
+	initialCommitHash := strings.TrimSpace(string(initialCommitBytes))
+
+	// Add a second commit
+	secondFile := filepath.Join(tmpDir, "second.txt")
+	os.WriteFile(secondFile, []byte("second content"), 0644)
+	exec.Command("git", "-C", tmpDir, "add", "second.txt").Run()
+	secondCommitCmd := exec.Command("git", "-C", tmpDir, "commit", "-m", "Second commit")
+	out, err = secondCommitCmd.CombinedOutput()
+	if err != nil {
+		t.Fatalf("Failed to create second commit: %v\n%s", err, out)
+	}
+
+	// Create a branch and tag
+	exec.Command("git", "-C", tmpDir, "branch", "test-branch").Run()
+	exec.Command("git", "-C", tmpDir, "tag", "-a", "v1.0.0", "-m", "Version 1.0.0").Run()
+
+	// Add a third commit
+	thirdFile := filepath.Join(tmpDir, "third.txt")
+	os.WriteFile(thirdFile, []byte("third content"), 0644)
+	exec.Command("git", "-C", tmpDir, "add", "third.txt").Run()
+	thirdCommitCmd := exec.Command("git", "-C", tmpDir, "commit", "-m", "Third commit")
+	out, err = thirdCommitCmd.CombinedOutput()
+	if err != nil {
+		t.Fatalf("Failed to create third commit: %v\n%s", err, out)
+	}
+
+	// Test GitRecentLog
+	log, err := GitRecentLog(tmpDir, initialCommitHash)
+	if err != nil {
+		t.Fatalf("GitRecentLog failed: %v", err)
+	}
+
+	// No need to check specific entries in order
+	// Just validate we can find the second and third commits we created
+
+	// Verify that we have the correct behavior with the fromCommit parameter:
+	// 1. We should find the second and third commits
+	// 2. We should NOT find the initial commit (it should be excluded)
+	foundThird := false
+	foundSecond := false
+	foundInitial := false
+	for _, entry := range log {
+		t.Logf("Found entry: %s - %s", entry.Hash, entry.Subject)
+		if entry.Subject == "Third commit" {
+			foundThird = true
+		} else if entry.Subject == "Second commit" {
+			foundSecond = true
+		} else if entry.Subject == "Initial commit" {
+			foundInitial = true
+		}
+	}
+
+	if !foundThird {
+		t.Errorf("Expected to find 'Third commit' in log entries")
+	}
+	if !foundSecond {
+		t.Errorf("Expected to find 'Second commit' in log entries")
+	}
+	if foundInitial {
+		t.Errorf("Should NOT have found 'Initial commit' in log entries (fromCommit parameter should exclude it)")
+	}
+}
+
+func TestParseRefsEdgeCases(t *testing.T) {
+	testCases := []struct {
+		name       string
+		decoration string
+		expected   []string
+	}{
+		{
+			name:       "Multiple tags and branches",
+			decoration: "(HEAD -> main, origin/main, tag: v1.0.0, tag: beta)",
+			expected:   []string{"main", "origin/main", "v1.0.0", "beta"},
+		},
+		{
+			name:       "Leading/trailing whitespace",
+			decoration: "  (HEAD -> main)  ",
+			expected:   []string{"main"},
+		},
+		{
+			name:       "No parentheses",
+			decoration: "HEAD -> main, tag: v1.0.0",
+			expected:   []string{"main", "v1.0.0"},
+		},
+		{
+			name:       "Feature branch with slash",
+			decoration: "(HEAD -> feature/new-ui)",
+			expected:   []string{"feature/new-ui"},
+		},
+		{
+			name:       "Only HEAD with no branch",
+			decoration: "(HEAD)",
+			expected:   []string{"HEAD"},
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			refs := parseRefs(tc.decoration)
+
+			if len(refs) != len(tc.expected) {
+				t.Errorf("%s: Expected %d refs, got %d", tc.name, len(tc.expected), len(refs))
+				return
+			}
+
+			for i, ref := range refs {
+				if ref != tc.expected[i] {
+					t.Errorf("%s: Expected ref[%d] = '%s', got '%s'", tc.name, i, tc.expected[i], ref)
+				}
+			}
+		})
+	}
+}