blob: 9ff8a71e639f29870de0903120d9b7d05e50b432 [file] [log] [blame]
Philip Zeyligerd3ac1122025-05-14 02:54:18 +00001// Package git_tools provides utilities for interacting with Git repositories.
2package git_tools
3
4import (
5 "bufio"
6 "fmt"
Philip Zeyliger272a90e2025-05-16 14:49:51 -07007 "os"
Philip Zeyligerd3ac1122025-05-14 02:54:18 +00008 "os/exec"
Philip Zeyliger272a90e2025-05-16 14:49:51 -07009 "path/filepath"
Philip Zeyligerd3ac1122025-05-14 02:54:18 +000010 "strings"
11)
12
13// DiffFile represents a file in a Git diff
14type DiffFile struct {
15 Path string `json:"path"`
16 OldMode string `json:"old_mode"`
17 NewMode string `json:"new_mode"`
18 OldHash string `json:"old_hash"`
19 NewHash string `json:"new_hash"`
20 Status string `json:"status"` // A=added, M=modified, D=deleted, etc.
21} // GitRawDiff returns a structured representation of the Git diff between two commits or references
Philip Zeyliger272a90e2025-05-16 14:49:51 -070022// If 'to' is empty, it will show unstaged changes (diff with working directory)
Philip Zeyligerd3ac1122025-05-14 02:54:18 +000023func GitRawDiff(repoDir, from, to string) ([]DiffFile, error) {
24 // Git command to generate the diff in raw format with full hashes
Philip Zeyliger272a90e2025-05-16 14:49:51 -070025 var cmd *exec.Cmd
26 if to == "" {
27 // If 'to' is empty, show unstaged changes
28 cmd = exec.Command("git", "-C", repoDir, "diff", "--raw", "--abbrev=40", from)
29 } else {
30 // Normal diff between two refs
31 cmd = exec.Command("git", "-C", repoDir, "diff", "--raw", "--abbrev=40", from, to)
32 }
33
Philip Zeyligerd3ac1122025-05-14 02:54:18 +000034 out, err := cmd.CombinedOutput()
35 if err != nil {
36 return nil, fmt.Errorf("error executing git diff: %w - %s", err, string(out))
37 }
38
39 // Parse the raw diff output into structured format
40 return parseRawDiff(string(out))
41}
42
43// GitShow returns the result of git show for a specific commit hash
44func GitShow(repoDir, hash string) (string, error) {
45 cmd := exec.Command("git", "-C", repoDir, "show", hash)
46 out, err := cmd.CombinedOutput()
47 if err != nil {
48 return "", fmt.Errorf("error executing git show: %w - %s", err, string(out))
49 }
50 return string(out), nil
51}
52
53// parseRawDiff converts git diff --raw output into structured format
54func parseRawDiff(diffOutput string) ([]DiffFile, error) {
55 var files []DiffFile
56 if diffOutput == "" {
57 return files, nil
58 }
59
60 // Process diff output line by line
61 scanner := bufio.NewScanner(strings.NewReader(strings.TrimSpace(diffOutput)))
62 for scanner.Scan() {
63 line := scanner.Text()
64 // Format: :oldmode newmode oldhash newhash status\tpath
65 // Example: :000000 100644 0000000000000000000000000000000000000000 6b33680ae6de90edd5f627c84147f7a41aa9d9cf A git_tools/git_tools.go
66 if !strings.HasPrefix(line, ":") {
67 continue
68 }
69
70 parts := strings.Fields(line[1:]) // Skip the leading colon
71 if len(parts) < 5 {
72 continue // Not enough parts, skip this line
73 }
74
75 oldMode := parts[0]
76 newMode := parts[1]
77 oldHash := parts[2]
78 newHash := parts[3]
79 status := parts[4]
80
81 // The path is everything after the status character and tab
82 pathIndex := strings.Index(line, status) + len(status) + 1 // +1 for the tab
83 path := ""
84 if pathIndex < len(line) {
85 path = strings.TrimSpace(line[pathIndex:])
86 }
87
88 files = append(files, DiffFile{
89 Path: path,
90 OldMode: oldMode,
91 NewMode: newMode,
92 OldHash: oldHash,
93 NewHash: newHash,
94 Status: status,
95 })
96 }
97
98 return files, nil
99}
100
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700101// GitLogEntry represents a single entry in the git log
102type GitLogEntry struct {
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000103 Hash string `json:"hash"` // The full commit hash
104 Refs []string `json:"refs"` // References (branches, tags) pointing to this commit
105 Subject string `json:"subject"` // The commit subject/message
106}
107
108// GitRecentLog returns the recent commit log between the initial commit and HEAD
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700109func GitRecentLog(repoDir string, initialCommitHash string) ([]GitLogEntry, error) {
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000110 // Validate input
111 if initialCommitHash == "" {
112 return nil, fmt.Errorf("initial commit hash must be provided")
113 }
114
115 // Find merge-base of HEAD and initial commit
116 cmdMergeBase := exec.Command("git", "-C", repoDir, "merge-base", "HEAD", initialCommitHash)
117 mergeBase, err := cmdMergeBase.CombinedOutput()
118 if err != nil {
119 // If merge-base fails (which can happen in simple repos), use initialCommitHash
120 return getGitLog(repoDir, initialCommitHash)
121 }
122
123 mergeBaseHash := strings.TrimSpace(string(mergeBase))
124 if mergeBaseHash == "" {
125 // If merge-base doesn't return a valid hash, use initialCommitHash
126 return getGitLog(repoDir, initialCommitHash)
127 }
128
129 // Use the merge-base as the 'from' point
130 return getGitLog(repoDir, mergeBaseHash)
131}
132
133// getGitLog gets the git log with the specified format using the provided fromCommit
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700134func getGitLog(repoDir string, fromCommit string) ([]GitLogEntry, error) {
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000135 // Check if fromCommit~10 exists (10 commits before fromCommit)
136 checkCmd := exec.Command("git", "-C", repoDir, "rev-parse", "--verify", fromCommit+"~10")
137 if err := checkCmd.Run(); err != nil {
138 // If fromCommit~10 doesn't exist, use just fromCommit..HEAD as the range
139 cmd := exec.Command("git", "-C", repoDir, "log", "-n", "1000", "--oneline", "--decorate", "--pretty=%H%x00%s%x00%d", fromCommit+"..HEAD")
140 out, err := cmd.CombinedOutput()
141 if err != nil {
142 return nil, fmt.Errorf("error executing git log: %w - %s", err, string(out))
143 }
144 return parseGitLog(string(out))
145 }
146
147 // Use fromCommit~10..HEAD range with the specified format for easy parsing
148 cmd := exec.Command("git", "-C", repoDir, "log", "-n", "1000", "--oneline", "--decorate", "--pretty=%H%x00%s%x00%d", fromCommit+"~10..HEAD")
149 out, err := cmd.CombinedOutput()
150 if err != nil {
151 return nil, fmt.Errorf("error executing git log: %w - %s", err, string(out))
152 }
153
154 return parseGitLog(string(out))
155}
156
157// parseGitLog parses the output of git log with null-separated fields
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700158func parseGitLog(logOutput string) ([]GitLogEntry, error) {
159 var entries []GitLogEntry
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000160 if logOutput == "" {
161 return entries, nil
162 }
163
164 scanner := bufio.NewScanner(strings.NewReader(strings.TrimSpace(logOutput)))
165 for scanner.Scan() {
166 line := scanner.Text()
167 parts := strings.Split(line, "\x00")
168 if len(parts) != 3 {
169 continue // Skip malformed lines
170 }
171
172 hash := parts[0]
173 subject := parts[1]
174 decoration := parts[2]
175
176 // Parse the refs from the decoration
177 refs := parseRefs(decoration)
178
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700179 entries = append(entries, GitLogEntry{
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000180 Hash: hash,
181 Refs: refs,
182 Subject: subject,
183 })
184 }
185
186 return entries, nil
187}
188
189// parseRefs extracts references from git decoration format
190func parseRefs(decoration string) []string {
191 // The decoration format from %d is: (HEAD -> main, origin/main, tag: v1.0.0)
192 if decoration == "" {
193 return nil
194 }
195
196 // Remove surrounding parentheses and whitespace
197 decoration = strings.TrimSpace(decoration)
198 decoration = strings.TrimPrefix(decoration, " (")
199 decoration = strings.TrimPrefix(decoration, "(")
200 decoration = strings.TrimSuffix(decoration, ")")
201 decoration = strings.TrimSuffix(decoration, ") ")
202
203 if decoration == "" {
204 return nil
205 }
206
207 // Split by comma
208 parts := strings.Split(decoration, ", ")
209
210 // Process each part
211 var refs []string
212 for _, part := range parts {
213 part = strings.TrimSpace(part)
214 if part == "" {
215 continue
216 }
217
218 // Handle HEAD -> branch format
219 if strings.HasPrefix(part, "HEAD -> ") {
220 refs = append(refs, strings.TrimPrefix(part, "HEAD -> "))
221 continue
222 }
223
224 // Handle tag: format
225 if strings.HasPrefix(part, "tag: ") {
226 refs = append(refs, strings.TrimPrefix(part, "tag: "))
227 continue
228 }
229
230 // Handle just HEAD (no branch)
231 if part == "HEAD" {
232 refs = append(refs, part)
233 continue
234 }
235
236 // Regular branch name
237 refs = append(refs, part)
238 }
239
240 return refs
241}
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700242
243// validateRepoPath verifies that a file is tracked by git and within the repository boundaries
244// Returns the full path to the file if valid
245func validateRepoPath(repoDir, filePath string) (string, error) {
246 // First verify that the requested file is tracked by git to prevent
247 // access to files outside the repository
248 cmd := exec.Command("git", "-C", repoDir, "ls-files", "--error-unmatch", filePath)
249 if err := cmd.Run(); err != nil {
250 return "", fmt.Errorf("file not tracked by git or outside repository: %s", filePath)
251 }
252
253 // Construct the full file path
254 fullPath := filepath.Join(repoDir, filePath)
255
256 // Validate that the resolved path is still within the repository directory
257 // to prevent directory traversal attacks (e.g., ../../../etc/passwd)
258 absRepoDir, err := filepath.Abs(repoDir)
259 if err != nil {
260 return "", fmt.Errorf("unable to resolve absolute repository path: %w", err)
261 }
262
263 absFilePath, err := filepath.Abs(fullPath)
264 if err != nil {
265 return "", fmt.Errorf("unable to resolve absolute file path: %w", err)
266 }
267
268 // Check that the absolute file path starts with the absolute repository path
269 if !strings.HasPrefix(absFilePath, absRepoDir+string(filepath.Separator)) {
270 return "", fmt.Errorf("file path outside repository: %s", filePath)
271 }
272
273 return fullPath, nil
274}
275
276// GitCat returns the contents of a file in the repository at the given path
277// This is used to get the current working copy of a file (not using git show)
278func GitCat(repoDir, filePath string) (string, error) {
279 fullPath, err := validateRepoPath(repoDir, filePath)
280 if err != nil {
281 return "", err
282 }
283
284 // Read the file
285 content, err := os.ReadFile(fullPath)
286 if err != nil {
287 return "", fmt.Errorf("error reading file %s: %w", filePath, err)
288 }
289
290 return string(content), nil
291}
292
293// GitSaveFile saves content to a file in the repository, checking first that it's tracked by git
294// This prevents writing to files outside the repository
295func GitSaveFile(repoDir, filePath, content string) error {
296 fullPath, err := validateRepoPath(repoDir, filePath)
297 if err != nil {
298 return err
299 }
300
301 // Write the content to the file
302 err = os.WriteFile(fullPath, []byte(content), 0644)
303 if err != nil {
304 return fmt.Errorf("error writing to file %s: %w", filePath, err)
305 }
306
307 return nil
308}