git_tools: Implement git diff and show API
Added git_tools package providing structured access to git diff and show commands. Exposed these methods via HTTP endpoints in loophttp.
This is a stepping stone to a better diff view.
Co-Authored-By: sketch <hello@sketch.dev>
Change-ID: se75f0a1b2c3d4e5k
diff --git a/git_tools/git_tools.go b/git_tools/git_tools.go
new file mode 100644
index 0000000..f8a1807
--- /dev/null
+++ b/git_tools/git_tools.go
@@ -0,0 +1,230 @@
+// Package git_tools provides utilities for interacting with Git repositories.
+package git_tools
+
+import (
+ "bufio"
+ "fmt"
+ "os/exec"
+ "strings"
+)
+
+// DiffFile represents a file in a Git diff
+type DiffFile struct {
+ Path string `json:"path"`
+ OldMode string `json:"old_mode"`
+ NewMode string `json:"new_mode"`
+ OldHash string `json:"old_hash"`
+ NewHash string `json:"new_hash"`
+ Status string `json:"status"` // A=added, M=modified, D=deleted, etc.
+} // GitRawDiff returns a structured representation of the Git diff between two commits or references
+func GitRawDiff(repoDir, from, to string) ([]DiffFile, error) {
+ // Git command to generate the diff in raw format with full hashes
+ cmd := exec.Command("git", "-C", repoDir, "diff", "--raw", "--abbrev=40", from, to)
+ out, err := cmd.CombinedOutput()
+ if err != nil {
+ return nil, fmt.Errorf("error executing git diff: %w - %s", err, string(out))
+ }
+
+ // Parse the raw diff output into structured format
+ return parseRawDiff(string(out))
+}
+
+// GitShow returns the result of git show for a specific commit hash
+func GitShow(repoDir, hash string) (string, error) {
+ cmd := exec.Command("git", "-C", repoDir, "show", hash)
+ out, err := cmd.CombinedOutput()
+ if err != nil {
+ return "", fmt.Errorf("error executing git show: %w - %s", err, string(out))
+ }
+ return string(out), nil
+}
+
+// parseRawDiff converts git diff --raw output into structured format
+func parseRawDiff(diffOutput string) ([]DiffFile, error) {
+ var files []DiffFile
+ if diffOutput == "" {
+ return files, nil
+ }
+
+ // Process diff output line by line
+ scanner := bufio.NewScanner(strings.NewReader(strings.TrimSpace(diffOutput)))
+ for scanner.Scan() {
+ line := scanner.Text()
+ // Format: :oldmode newmode oldhash newhash status\tpath
+ // Example: :000000 100644 0000000000000000000000000000000000000000 6b33680ae6de90edd5f627c84147f7a41aa9d9cf A git_tools/git_tools.go
+ if !strings.HasPrefix(line, ":") {
+ continue
+ }
+
+ parts := strings.Fields(line[1:]) // Skip the leading colon
+ if len(parts) < 5 {
+ continue // Not enough parts, skip this line
+ }
+
+ oldMode := parts[0]
+ newMode := parts[1]
+ oldHash := parts[2]
+ newHash := parts[3]
+ status := parts[4]
+
+ // The path is everything after the status character and tab
+ pathIndex := strings.Index(line, status) + len(status) + 1 // +1 for the tab
+ path := ""
+ if pathIndex < len(line) {
+ path = strings.TrimSpace(line[pathIndex:])
+ }
+
+ files = append(files, DiffFile{
+ Path: path,
+ OldMode: oldMode,
+ NewMode: newMode,
+ OldHash: oldHash,
+ NewHash: newHash,
+ Status: status,
+ })
+ }
+
+ return files, nil
+}
+
+// LogEntry represents a single entry in the git log
+type LogEntry struct {
+ Hash string `json:"hash"` // The full commit hash
+ Refs []string `json:"refs"` // References (branches, tags) pointing to this commit
+ Subject string `json:"subject"` // The commit subject/message
+}
+
+// GitRecentLog returns the recent commit log between the initial commit and HEAD
+func GitRecentLog(repoDir string, initialCommitHash string) ([]LogEntry, error) {
+ // Validate input
+ if initialCommitHash == "" {
+ return nil, fmt.Errorf("initial commit hash must be provided")
+ }
+
+ // Find merge-base of HEAD and initial commit
+ cmdMergeBase := exec.Command("git", "-C", repoDir, "merge-base", "HEAD", initialCommitHash)
+ mergeBase, err := cmdMergeBase.CombinedOutput()
+ if err != nil {
+ // If merge-base fails (which can happen in simple repos), use initialCommitHash
+ return getGitLog(repoDir, initialCommitHash)
+ }
+
+ mergeBaseHash := strings.TrimSpace(string(mergeBase))
+ if mergeBaseHash == "" {
+ // If merge-base doesn't return a valid hash, use initialCommitHash
+ return getGitLog(repoDir, initialCommitHash)
+ }
+
+ // Use the merge-base as the 'from' point
+ return getGitLog(repoDir, mergeBaseHash)
+}
+
+// getGitLog gets the git log with the specified format using the provided fromCommit
+func getGitLog(repoDir string, fromCommit string) ([]LogEntry, error) {
+ // Check if fromCommit~10 exists (10 commits before fromCommit)
+ checkCmd := exec.Command("git", "-C", repoDir, "rev-parse", "--verify", fromCommit+"~10")
+ if err := checkCmd.Run(); err != nil {
+ // If fromCommit~10 doesn't exist, use just fromCommit..HEAD as the range
+ cmd := exec.Command("git", "-C", repoDir, "log", "-n", "1000", "--oneline", "--decorate", "--pretty=%H%x00%s%x00%d", fromCommit+"..HEAD")
+ out, err := cmd.CombinedOutput()
+ if err != nil {
+ return nil, fmt.Errorf("error executing git log: %w - %s", err, string(out))
+ }
+ return parseGitLog(string(out))
+ }
+
+ // Use fromCommit~10..HEAD range with the specified format for easy parsing
+ cmd := exec.Command("git", "-C", repoDir, "log", "-n", "1000", "--oneline", "--decorate", "--pretty=%H%x00%s%x00%d", fromCommit+"~10..HEAD")
+ out, err := cmd.CombinedOutput()
+ if err != nil {
+ return nil, fmt.Errorf("error executing git log: %w - %s", err, string(out))
+ }
+
+ return parseGitLog(string(out))
+}
+
+// parseGitLog parses the output of git log with null-separated fields
+func parseGitLog(logOutput string) ([]LogEntry, error) {
+ var entries []LogEntry
+ if logOutput == "" {
+ return entries, nil
+ }
+
+ scanner := bufio.NewScanner(strings.NewReader(strings.TrimSpace(logOutput)))
+ for scanner.Scan() {
+ line := scanner.Text()
+ parts := strings.Split(line, "\x00")
+ if len(parts) != 3 {
+ continue // Skip malformed lines
+ }
+
+ hash := parts[0]
+ subject := parts[1]
+ decoration := parts[2]
+
+ // Parse the refs from the decoration
+ refs := parseRefs(decoration)
+
+ entries = append(entries, LogEntry{
+ Hash: hash,
+ Refs: refs,
+ Subject: subject,
+ })
+ }
+
+ return entries, nil
+}
+
+// parseRefs extracts references from git decoration format
+func parseRefs(decoration string) []string {
+ // The decoration format from %d is: (HEAD -> main, origin/main, tag: v1.0.0)
+ if decoration == "" {
+ return nil
+ }
+
+ // Remove surrounding parentheses and whitespace
+ decoration = strings.TrimSpace(decoration)
+ decoration = strings.TrimPrefix(decoration, " (")
+ decoration = strings.TrimPrefix(decoration, "(")
+ decoration = strings.TrimSuffix(decoration, ")")
+ decoration = strings.TrimSuffix(decoration, ") ")
+
+ if decoration == "" {
+ return nil
+ }
+
+ // Split by comma
+ parts := strings.Split(decoration, ", ")
+
+ // Process each part
+ var refs []string
+ for _, part := range parts {
+ part = strings.TrimSpace(part)
+ if part == "" {
+ continue
+ }
+
+ // Handle HEAD -> branch format
+ if strings.HasPrefix(part, "HEAD -> ") {
+ refs = append(refs, strings.TrimPrefix(part, "HEAD -> "))
+ continue
+ }
+
+ // Handle tag: format
+ if strings.HasPrefix(part, "tag: ") {
+ refs = append(refs, strings.TrimPrefix(part, "tag: "))
+ continue
+ }
+
+ // Handle just HEAD (no branch)
+ if part == "HEAD" {
+ refs = append(refs, part)
+ continue
+ }
+
+ // Regular branch name
+ refs = append(refs, part)
+ }
+
+ return refs
+}
diff --git a/git_tools/git_tools_test.go b/git_tools/git_tools_test.go
new file mode 100644
index 0000000..5be6b0f
--- /dev/null
+++ b/git_tools/git_tools_test.go
@@ -0,0 +1,365 @@
+package git_tools
+
+import (
+ "os"
+ "os/exec"
+ "path/filepath"
+ "strings"
+ "testing"
+)
+
+func setupTestRepo(t *testing.T) string {
+ // Create a temporary directory for the test repository
+ tempDir, err := os.MkdirTemp("", "git-tools-test")
+ if err != nil {
+ t.Fatalf("Failed to create temp directory: %v", err)
+ }
+
+ // Initialize a git repository
+ cmd := exec.Command("git", "-C", tempDir, "init")
+ if out, err := cmd.CombinedOutput(); err != nil {
+ t.Fatalf("Failed to initialize git repo: %v - %s", err, out)
+ }
+
+ // Configure git user
+ cmd = exec.Command("git", "-C", tempDir, "config", "user.email", "test@example.com")
+ if out, err := cmd.CombinedOutput(); err != nil {
+ t.Fatalf("Failed to configure git user email: %v - %s", err, out)
+ }
+
+ cmd = exec.Command("git", "-C", tempDir, "config", "user.name", "Test User")
+ if out, err := cmd.CombinedOutput(); err != nil {
+ t.Fatalf("Failed to configure git user name: %v - %s", err, out)
+ }
+
+ return tempDir
+}
+
+func createAndCommitFile(t *testing.T, repoDir, filename, content string, stage bool) string {
+ filePath := filepath.Join(repoDir, filename)
+ if err := os.WriteFile(filePath, []byte(content), 0644); err != nil {
+ t.Fatalf("Failed to write file: %v", err)
+ }
+
+ if stage {
+ cmd := exec.Command("git", "-C", repoDir, "add", filename)
+ if out, err := cmd.CombinedOutput(); err != nil {
+ t.Fatalf("Failed to add file: %v - %s", err, out)
+ }
+
+ cmd = exec.Command("git", "-C", repoDir, "commit", "-m", "Add "+filename)
+ if out, err := cmd.CombinedOutput(); err != nil {
+ t.Fatalf("Failed to commit file: %v - %s", err, out)
+ }
+
+ // Get the commit hash
+ cmd = exec.Command("git", "-C", repoDir, "rev-parse", "HEAD")
+ out, err := cmd.Output()
+ if err != nil {
+ t.Fatalf("Failed to get commit hash: %v", err)
+ }
+ return string(out[:len(out)-1]) // Trim newline
+ }
+
+ return ""
+}
+
+func TestGitRawDiff(t *testing.T) {
+ repoDir := setupTestRepo(t)
+ defer os.RemoveAll(repoDir)
+
+ // Create initial file
+ initHash := createAndCommitFile(t, repoDir, "test.txt", "initial content\n", true)
+
+ // Modify the file
+ modHash := createAndCommitFile(t, repoDir, "test.txt", "initial content\nmodified content\n", true)
+
+ // Test the diff between the two commits
+ diff, err := GitRawDiff(repoDir, initHash, modHash)
+ if err != nil {
+ t.Fatalf("GitRawDiff failed: %v", err)
+ }
+
+ if len(diff) != 1 {
+ t.Fatalf("Expected 1 file in diff, got %d", len(diff))
+ }
+
+ if diff[0].Path != "test.txt" {
+ t.Errorf("Expected path to be test.txt, got %s", diff[0].Path)
+ }
+
+ if diff[0].Status != "M" {
+ t.Errorf("Expected status to be M (modified), got %s", diff[0].Status)
+ }
+
+ if diff[0].OldMode == "" || diff[0].NewMode == "" {
+ t.Error("Expected file modes to be present")
+ }
+
+ if diff[0].OldHash == "" || diff[0].NewHash == "" {
+ t.Error("Expected file hashes to be present")
+ }
+
+ // Test with invalid commit hash
+ _, err = GitRawDiff(repoDir, "invalid", modHash)
+ if err == nil {
+ t.Error("Expected error for invalid commit hash, got none")
+ }
+}
+
+func TestGitShow(t *testing.T) {
+ repoDir := setupTestRepo(t)
+ defer os.RemoveAll(repoDir)
+
+ // Create file and commit
+ commitHash := createAndCommitFile(t, repoDir, "test.txt", "test content\n", true)
+
+ // Test GitShow
+ show, err := GitShow(repoDir, commitHash)
+ if err != nil {
+ t.Fatalf("GitShow failed: %v", err)
+ }
+
+ if show == "" {
+ t.Error("Expected non-empty output from GitShow")
+ }
+
+ // Test with invalid commit hash
+ _, err = GitShow(repoDir, "invalid")
+ if err == nil {
+ t.Error("Expected error for invalid commit hash, got none")
+ }
+}
+
+func TestParseGitLog(t *testing.T) {
+ // Test with the format from --pretty="%H%x00%s%x00%d"
+ logOutput := "abc123\x00Initial commit\x00 (HEAD -> main, origin/main)\n" +
+ "def456\x00Add feature X\x00 (tag: v1.0.0)\n" +
+ "ghi789\x00Fix bug Y\x00"
+
+ entries, err := parseGitLog(logOutput)
+ if err != nil {
+ t.Fatalf("parseGitLog returned error: %v", err)
+ }
+
+ if len(entries) != 3 {
+ t.Fatalf("Expected 3 log entries, got %d", len(entries))
+ }
+
+ // Check first entry
+ if entries[0].Hash != "abc123" {
+ t.Errorf("Expected hash abc123, got %s", entries[0].Hash)
+ }
+ if len(entries[0].Refs) != 2 {
+ t.Errorf("Expected 2 refs, got %d", len(entries[0].Refs))
+ }
+ if entries[0].Refs[0] != "main" || entries[0].Refs[1] != "origin/main" {
+ t.Errorf("Incorrect refs parsed: %v", entries[0].Refs)
+ }
+ if entries[0].Subject != "Initial commit" {
+ t.Errorf("Expected subject 'Initial commit', got '%s'", entries[0].Subject)
+ }
+
+ // Check second entry
+ if entries[1].Hash != "def456" {
+ t.Errorf("Expected hash def456, got %s", entries[1].Hash)
+ }
+ if len(entries[1].Refs) != 1 {
+ t.Errorf("Expected 1 ref, got %d", len(entries[1].Refs))
+ }
+ if entries[1].Refs[0] != "v1.0.0" {
+ t.Errorf("Incorrect tag parsed: %v", entries[1].Refs)
+ }
+ if entries[1].Subject != "Add feature X" {
+ t.Errorf("Expected subject 'Add feature X', got '%s'", entries[1].Subject)
+ }
+
+ // Check third entry
+ if entries[2].Hash != "ghi789" {
+ t.Errorf("Expected hash ghi789, got %s", entries[2].Hash)
+ }
+ if len(entries[2].Refs) != 0 {
+ t.Errorf("Expected 0 refs, got %d", len(entries[2].Refs))
+ }
+ if entries[2].Subject != "Fix bug Y" {
+ t.Errorf("Expected subject 'Fix bug Y', got '%s'", entries[2].Subject)
+ }
+}
+
+func TestParseRefs(t *testing.T) {
+ testCases := []struct {
+ decoration string
+ expected []string
+ }{
+ {"(HEAD -> main, origin/main)", []string{"main", "origin/main"}},
+ {"(tag: v1.0.0)", []string{"v1.0.0"}},
+ {"(HEAD -> feature/branch, origin/feature/branch, tag: v0.9.0)", []string{"feature/branch", "origin/feature/branch", "v0.9.0"}},
+ {" (tag: v2.0.0) ", []string{"v2.0.0"}},
+ {"", nil},
+ {" ", nil},
+ {"()", nil},
+ }
+
+ for i, tc := range testCases {
+ refs := parseRefs(tc.decoration)
+
+ if len(refs) != len(tc.expected) {
+ t.Errorf("Case %d: Expected %d refs, got %d", i, len(tc.expected), len(refs))
+ continue
+ }
+
+ for j, ref := range refs {
+ if j >= len(tc.expected) || ref != tc.expected[j] {
+ t.Errorf("Case %d: Expected ref '%s', got '%s'", i, tc.expected[j], ref)
+ }
+ }
+ }
+}
+
+func TestGitRecentLog(t *testing.T) {
+ // Create a temporary directory for the test repository
+ tmpDir, err := os.MkdirTemp("", "git-test-*")
+ if err != nil {
+ t.Fatalf("Failed to create temp dir: %v", err)
+ }
+ defer os.RemoveAll(tmpDir)
+
+ // Initialize a git repository
+ initCmd := exec.Command("git", "-C", tmpDir, "init")
+ if out, err := initCmd.CombinedOutput(); err != nil {
+ t.Fatalf("Failed to initialize git repository: %v\n%s", err, out)
+ }
+
+ // Configure git user for the test repository
+ exec.Command("git", "-C", tmpDir, "config", "user.name", "Test User").Run()
+ exec.Command("git", "-C", tmpDir, "config", "user.email", "test@example.com").Run()
+
+ // Create initial commit
+ initialFile := filepath.Join(tmpDir, "initial.txt")
+ os.WriteFile(initialFile, []byte("initial content"), 0644)
+ exec.Command("git", "-C", tmpDir, "add", "initial.txt").Run()
+ initialCommitCmd := exec.Command("git", "-C", tmpDir, "commit", "-m", "Initial commit")
+ out, err := initialCommitCmd.CombinedOutput()
+ if err != nil {
+ t.Fatalf("Failed to create initial commit: %v\n%s", err, out)
+ }
+
+ // Get the initial commit hash
+ initialCommitCmd = exec.Command("git", "-C", tmpDir, "rev-parse", "HEAD")
+ initialCommitBytes, err := initialCommitCmd.Output()
+ if err != nil {
+ t.Fatalf("Failed to get initial commit hash: %v", err)
+ }
+ initialCommitHash := strings.TrimSpace(string(initialCommitBytes))
+
+ // Add a second commit
+ secondFile := filepath.Join(tmpDir, "second.txt")
+ os.WriteFile(secondFile, []byte("second content"), 0644)
+ exec.Command("git", "-C", tmpDir, "add", "second.txt").Run()
+ secondCommitCmd := exec.Command("git", "-C", tmpDir, "commit", "-m", "Second commit")
+ out, err = secondCommitCmd.CombinedOutput()
+ if err != nil {
+ t.Fatalf("Failed to create second commit: %v\n%s", err, out)
+ }
+
+ // Create a branch and tag
+ exec.Command("git", "-C", tmpDir, "branch", "test-branch").Run()
+ exec.Command("git", "-C", tmpDir, "tag", "-a", "v1.0.0", "-m", "Version 1.0.0").Run()
+
+ // Add a third commit
+ thirdFile := filepath.Join(tmpDir, "third.txt")
+ os.WriteFile(thirdFile, []byte("third content"), 0644)
+ exec.Command("git", "-C", tmpDir, "add", "third.txt").Run()
+ thirdCommitCmd := exec.Command("git", "-C", tmpDir, "commit", "-m", "Third commit")
+ out, err = thirdCommitCmd.CombinedOutput()
+ if err != nil {
+ t.Fatalf("Failed to create third commit: %v\n%s", err, out)
+ }
+
+ // Test GitRecentLog
+ log, err := GitRecentLog(tmpDir, initialCommitHash)
+ if err != nil {
+ t.Fatalf("GitRecentLog failed: %v", err)
+ }
+
+ // No need to check specific entries in order
+ // Just validate we can find the second and third commits we created
+
+ // Verify that we have the correct behavior with the fromCommit parameter:
+ // 1. We should find the second and third commits
+ // 2. We should NOT find the initial commit (it should be excluded)
+ foundThird := false
+ foundSecond := false
+ foundInitial := false
+ for _, entry := range log {
+ t.Logf("Found entry: %s - %s", entry.Hash, entry.Subject)
+ if entry.Subject == "Third commit" {
+ foundThird = true
+ } else if entry.Subject == "Second commit" {
+ foundSecond = true
+ } else if entry.Subject == "Initial commit" {
+ foundInitial = true
+ }
+ }
+
+ if !foundThird {
+ t.Errorf("Expected to find 'Third commit' in log entries")
+ }
+ if !foundSecond {
+ t.Errorf("Expected to find 'Second commit' in log entries")
+ }
+ if foundInitial {
+ t.Errorf("Should NOT have found 'Initial commit' in log entries (fromCommit parameter should exclude it)")
+ }
+}
+
+func TestParseRefsEdgeCases(t *testing.T) {
+ testCases := []struct {
+ name string
+ decoration string
+ expected []string
+ }{
+ {
+ name: "Multiple tags and branches",
+ decoration: "(HEAD -> main, origin/main, tag: v1.0.0, tag: beta)",
+ expected: []string{"main", "origin/main", "v1.0.0", "beta"},
+ },
+ {
+ name: "Leading/trailing whitespace",
+ decoration: " (HEAD -> main) ",
+ expected: []string{"main"},
+ },
+ {
+ name: "No parentheses",
+ decoration: "HEAD -> main, tag: v1.0.0",
+ expected: []string{"main", "v1.0.0"},
+ },
+ {
+ name: "Feature branch with slash",
+ decoration: "(HEAD -> feature/new-ui)",
+ expected: []string{"feature/new-ui"},
+ },
+ {
+ name: "Only HEAD with no branch",
+ decoration: "(HEAD)",
+ expected: []string{"HEAD"},
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.name, func(t *testing.T) {
+ refs := parseRefs(tc.decoration)
+
+ if len(refs) != len(tc.expected) {
+ t.Errorf("%s: Expected %d refs, got %d", tc.name, len(tc.expected), len(refs))
+ return
+ }
+
+ for i, ref := range refs {
+ if ref != tc.expected[i] {
+ t.Errorf("%s: Expected ref[%d] = '%s', got '%s'", tc.name, i, tc.expected[i], ref)
+ }
+ }
+ })
+ }
+}