blob: fa53e8de1cc0b6f17d826cddb3162569972b9aa4 [file] [log] [blame]
Philip Zeyligerd3ac1122025-05-14 02:54:18 +00001// Package git_tools provides utilities for interacting with Git repositories.
2package git_tools
3
4import (
5 "bufio"
Philip Zeyliger75bd37d2025-05-22 18:49:14 +00006 "context"
Philip Zeyligerd3ac1122025-05-14 02:54:18 +00007 "fmt"
Philip Zeyliger272a90e2025-05-16 14:49:51 -07008 "os"
Philip Zeyligerd3ac1122025-05-14 02:54:18 +00009 "os/exec"
Philip Zeyliger272a90e2025-05-16 14:49:51 -070010 "path/filepath"
Philip Zeyligerd3ac1122025-05-14 02:54:18 +000011 "strings"
12)
13
14// DiffFile represents a file in a Git diff
15type DiffFile struct {
Philip Zeyligere89b3082025-05-29 03:16:06 +000016 Path string `json:"path"`
17 OldMode string `json:"old_mode"`
18 NewMode string `json:"new_mode"`
19 OldHash string `json:"old_hash"`
20 NewHash string `json:"new_hash"`
21 Status string `json:"status"` // A=added, M=modified, D=deleted, etc.
22 Additions int `json:"additions"` // Number of lines added
23 Deletions int `json:"deletions"` // Number of lines deleted
Philip Zeyligerd3ac1122025-05-14 02:54:18 +000024} // GitRawDiff returns a structured representation of the Git diff between two commits or references
Philip Zeyliger272a90e2025-05-16 14:49:51 -070025// If 'to' is empty, it will show unstaged changes (diff with working directory)
Philip Zeyligerd3ac1122025-05-14 02:54:18 +000026func GitRawDiff(repoDir, from, to string) ([]DiffFile, error) {
Philip Zeyligere89b3082025-05-29 03:16:06 +000027 // Git command to generate the diff in raw format with full hashes and numstat
28 var rawCmd, numstatCmd *exec.Cmd
Philip Zeyliger272a90e2025-05-16 14:49:51 -070029 if to == "" {
30 // If 'to' is empty, show unstaged changes
Philip Zeyligere89b3082025-05-29 03:16:06 +000031 rawCmd = exec.Command("git", "-C", repoDir, "diff", "--raw", "--abbrev=40", from)
32 numstatCmd = exec.Command("git", "-C", repoDir, "diff", "--numstat", from)
Philip Zeyliger272a90e2025-05-16 14:49:51 -070033 } else {
34 // Normal diff between two refs
Philip Zeyligere89b3082025-05-29 03:16:06 +000035 rawCmd = exec.Command("git", "-C", repoDir, "diff", "--raw", "--abbrev=40", from, to)
36 numstatCmd = exec.Command("git", "-C", repoDir, "diff", "--numstat", from, to)
Philip Zeyliger272a90e2025-05-16 14:49:51 -070037 }
38
Philip Zeyligere89b3082025-05-29 03:16:06 +000039 // Execute raw diff command
40 rawOut, err := rawCmd.CombinedOutput()
Philip Zeyligerd3ac1122025-05-14 02:54:18 +000041 if err != nil {
Philip Zeyligere89b3082025-05-29 03:16:06 +000042 return nil, fmt.Errorf("error executing git diff --raw: %w - %s", err, string(rawOut))
43 }
44
45 // Execute numstat command
46 numstatOut, err := numstatCmd.CombinedOutput()
47 if err != nil {
48 return nil, fmt.Errorf("error executing git diff --numstat: %w - %s", err, string(numstatOut))
Philip Zeyligerd3ac1122025-05-14 02:54:18 +000049 }
50
51 // Parse the raw diff output into structured format
Philip Zeyligere89b3082025-05-29 03:16:06 +000052 return parseRawDiffWithNumstat(string(rawOut), string(numstatOut))
Philip Zeyligerd3ac1122025-05-14 02:54:18 +000053}
54
55// GitShow returns the result of git show for a specific commit hash
56func GitShow(repoDir, hash string) (string, error) {
57 cmd := exec.Command("git", "-C", repoDir, "show", hash)
58 out, err := cmd.CombinedOutput()
59 if err != nil {
60 return "", fmt.Errorf("error executing git show: %w - %s", err, string(out))
61 }
62 return string(out), nil
63}
64
Philip Zeyligere89b3082025-05-29 03:16:06 +000065// parseRawDiffWithNumstat converts git diff --raw and --numstat output into structured format
66func parseRawDiffWithNumstat(rawOutput, numstatOutput string) ([]DiffFile, error) {
67 // First parse the raw diff to get the base file information
68 files, err := parseRawDiff(rawOutput)
69 if err != nil {
70 return nil, err
71 }
72
73 // Create a map to store numstat data by file path
74 numstatMap := make(map[string]struct{ additions, deletions int })
75
76 // Parse numstat output
77 if numstatOutput != "" {
78 scanner := bufio.NewScanner(strings.NewReader(strings.TrimSpace(numstatOutput)))
79 for scanner.Scan() {
80 line := scanner.Text()
81 // Format: additions\tdeletions\tfilename
82 // Example: 5\t3\tpath/to/file.go
83 parts := strings.Split(line, "\t")
84 if len(parts) >= 3 {
85 additions := 0
86 deletions := 0
87
88 // Handle binary files (marked with "-")
89 if parts[0] != "-" {
90 if add, err := fmt.Sscanf(parts[0], "%d", &additions); err != nil || add != 1 {
91 additions = 0
92 }
93 }
94 if parts[1] != "-" {
95 if del, err := fmt.Sscanf(parts[1], "%d", &deletions); err != nil || del != 1 {
96 deletions = 0
97 }
98 }
99
100 filePath := strings.Join(parts[2:], "\t") // Handle filenames with tabs
101 numstatMap[filePath] = struct{ additions, deletions int }{additions, deletions}
102 }
103 }
104 }
105
106 // Merge numstat data into files
107 for i := range files {
108 if stats, found := numstatMap[files[i].Path]; found {
109 files[i].Additions = stats.additions
110 files[i].Deletions = stats.deletions
111 }
112 }
113
114 return files, nil
115}
116
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000117// parseRawDiff converts git diff --raw output into structured format
118func parseRawDiff(diffOutput string) ([]DiffFile, error) {
119 var files []DiffFile
120 if diffOutput == "" {
121 return files, nil
122 }
123
124 // Process diff output line by line
125 scanner := bufio.NewScanner(strings.NewReader(strings.TrimSpace(diffOutput)))
126 for scanner.Scan() {
127 line := scanner.Text()
128 // Format: :oldmode newmode oldhash newhash status\tpath
129 // Example: :000000 100644 0000000000000000000000000000000000000000 6b33680ae6de90edd5f627c84147f7a41aa9d9cf A git_tools/git_tools.go
130 if !strings.HasPrefix(line, ":") {
131 continue
132 }
133
134 parts := strings.Fields(line[1:]) // Skip the leading colon
135 if len(parts) < 5 {
136 continue // Not enough parts, skip this line
137 }
138
139 oldMode := parts[0]
140 newMode := parts[1]
141 oldHash := parts[2]
142 newHash := parts[3]
143 status := parts[4]
144
145 // The path is everything after the status character and tab
146 pathIndex := strings.Index(line, status) + len(status) + 1 // +1 for the tab
147 path := ""
148 if pathIndex < len(line) {
149 path = strings.TrimSpace(line[pathIndex:])
150 }
151
152 files = append(files, DiffFile{
Philip Zeyligere89b3082025-05-29 03:16:06 +0000153 Path: path,
154 OldMode: oldMode,
155 NewMode: newMode,
156 OldHash: oldHash,
157 NewHash: newHash,
158 Status: status,
159 Additions: 0, // Will be filled by numstat data
160 Deletions: 0, // Will be filled by numstat data
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000161 })
162 }
163
164 return files, nil
165}
166
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700167// GitLogEntry represents a single entry in the git log
168type GitLogEntry struct {
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000169 Hash string `json:"hash"` // The full commit hash
170 Refs []string `json:"refs"` // References (branches, tags) pointing to this commit
171 Subject string `json:"subject"` // The commit subject/message
172}
173
174// GitRecentLog returns the recent commit log between the initial commit and HEAD
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700175func GitRecentLog(repoDir string, initialCommitHash string) ([]GitLogEntry, error) {
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000176 // Validate input
177 if initialCommitHash == "" {
178 return nil, fmt.Errorf("initial commit hash must be provided")
179 }
180
181 // Find merge-base of HEAD and initial commit
182 cmdMergeBase := exec.Command("git", "-C", repoDir, "merge-base", "HEAD", initialCommitHash)
183 mergeBase, err := cmdMergeBase.CombinedOutput()
184 if err != nil {
185 // If merge-base fails (which can happen in simple repos), use initialCommitHash
186 return getGitLog(repoDir, initialCommitHash)
187 }
188
189 mergeBaseHash := strings.TrimSpace(string(mergeBase))
190 if mergeBaseHash == "" {
191 // If merge-base doesn't return a valid hash, use initialCommitHash
192 return getGitLog(repoDir, initialCommitHash)
193 }
194
195 // Use the merge-base as the 'from' point
196 return getGitLog(repoDir, mergeBaseHash)
197}
198
199// getGitLog gets the git log with the specified format using the provided fromCommit
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700200func getGitLog(repoDir string, fromCommit string) ([]GitLogEntry, error) {
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000201 // Check if fromCommit~10 exists (10 commits before fromCommit)
202 checkCmd := exec.Command("git", "-C", repoDir, "rev-parse", "--verify", fromCommit+"~10")
203 if err := checkCmd.Run(); err != nil {
204 // If fromCommit~10 doesn't exist, use just fromCommit..HEAD as the range
205 cmd := exec.Command("git", "-C", repoDir, "log", "-n", "1000", "--oneline", "--decorate", "--pretty=%H%x00%s%x00%d", fromCommit+"..HEAD")
206 out, err := cmd.CombinedOutput()
207 if err != nil {
208 return nil, fmt.Errorf("error executing git log: %w - %s", err, string(out))
209 }
210 return parseGitLog(string(out))
211 }
212
213 // Use fromCommit~10..HEAD range with the specified format for easy parsing
214 cmd := exec.Command("git", "-C", repoDir, "log", "-n", "1000", "--oneline", "--decorate", "--pretty=%H%x00%s%x00%d", fromCommit+"~10..HEAD")
215 out, err := cmd.CombinedOutput()
216 if err != nil {
217 return nil, fmt.Errorf("error executing git log: %w - %s", err, string(out))
218 }
219
220 return parseGitLog(string(out))
221}
222
223// parseGitLog parses the output of git log with null-separated fields
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700224func parseGitLog(logOutput string) ([]GitLogEntry, error) {
225 var entries []GitLogEntry
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000226 if logOutput == "" {
227 return entries, nil
228 }
229
230 scanner := bufio.NewScanner(strings.NewReader(strings.TrimSpace(logOutput)))
231 for scanner.Scan() {
232 line := scanner.Text()
233 parts := strings.Split(line, "\x00")
234 if len(parts) != 3 {
235 continue // Skip malformed lines
236 }
237
238 hash := parts[0]
239 subject := parts[1]
240 decoration := parts[2]
241
242 // Parse the refs from the decoration
243 refs := parseRefs(decoration)
244
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700245 entries = append(entries, GitLogEntry{
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000246 Hash: hash,
247 Refs: refs,
248 Subject: subject,
249 })
250 }
251
252 return entries, nil
253}
254
255// parseRefs extracts references from git decoration format
256func parseRefs(decoration string) []string {
257 // The decoration format from %d is: (HEAD -> main, origin/main, tag: v1.0.0)
258 if decoration == "" {
259 return nil
260 }
261
262 // Remove surrounding parentheses and whitespace
263 decoration = strings.TrimSpace(decoration)
264 decoration = strings.TrimPrefix(decoration, " (")
265 decoration = strings.TrimPrefix(decoration, "(")
266 decoration = strings.TrimSuffix(decoration, ")")
267 decoration = strings.TrimSuffix(decoration, ") ")
268
269 if decoration == "" {
270 return nil
271 }
272
273 // Split by comma
274 parts := strings.Split(decoration, ", ")
275
276 // Process each part
277 var refs []string
278 for _, part := range parts {
279 part = strings.TrimSpace(part)
280 if part == "" {
281 continue
282 }
283
284 // Handle HEAD -> branch format
285 if strings.HasPrefix(part, "HEAD -> ") {
286 refs = append(refs, strings.TrimPrefix(part, "HEAD -> "))
287 continue
288 }
289
290 // Handle tag: format
291 if strings.HasPrefix(part, "tag: ") {
292 refs = append(refs, strings.TrimPrefix(part, "tag: "))
293 continue
294 }
295
296 // Handle just HEAD (no branch)
297 if part == "HEAD" {
298 refs = append(refs, part)
299 continue
300 }
301
302 // Regular branch name
303 refs = append(refs, part)
304 }
305
306 return refs
307}
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700308
309// validateRepoPath verifies that a file is tracked by git and within the repository boundaries
310// Returns the full path to the file if valid
311func validateRepoPath(repoDir, filePath string) (string, error) {
312 // First verify that the requested file is tracked by git to prevent
313 // access to files outside the repository
314 cmd := exec.Command("git", "-C", repoDir, "ls-files", "--error-unmatch", filePath)
315 if err := cmd.Run(); err != nil {
316 return "", fmt.Errorf("file not tracked by git or outside repository: %s", filePath)
317 }
318
319 // Construct the full file path
320 fullPath := filepath.Join(repoDir, filePath)
321
322 // Validate that the resolved path is still within the repository directory
323 // to prevent directory traversal attacks (e.g., ../../../etc/passwd)
324 absRepoDir, err := filepath.Abs(repoDir)
325 if err != nil {
326 return "", fmt.Errorf("unable to resolve absolute repository path: %w", err)
327 }
328
329 absFilePath, err := filepath.Abs(fullPath)
330 if err != nil {
331 return "", fmt.Errorf("unable to resolve absolute file path: %w", err)
332 }
333
334 // Check that the absolute file path starts with the absolute repository path
335 if !strings.HasPrefix(absFilePath, absRepoDir+string(filepath.Separator)) {
336 return "", fmt.Errorf("file path outside repository: %s", filePath)
337 }
338
339 return fullPath, nil
340}
341
342// GitCat returns the contents of a file in the repository at the given path
343// This is used to get the current working copy of a file (not using git show)
344func GitCat(repoDir, filePath string) (string, error) {
345 fullPath, err := validateRepoPath(repoDir, filePath)
346 if err != nil {
347 return "", err
348 }
349
350 // Read the file
351 content, err := os.ReadFile(fullPath)
352 if err != nil {
353 return "", fmt.Errorf("error reading file %s: %w", filePath, err)
354 }
355
356 return string(content), nil
357}
358
359// GitSaveFile saves content to a file in the repository, checking first that it's tracked by git
360// This prevents writing to files outside the repository
361func GitSaveFile(repoDir, filePath, content string) error {
362 fullPath, err := validateRepoPath(repoDir, filePath)
363 if err != nil {
364 return err
365 }
366
367 // Write the content to the file
Autoformatter8c463622025-05-16 21:54:17 +0000368 err = os.WriteFile(fullPath, []byte(content), 0o644)
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700369 if err != nil {
370 return fmt.Errorf("error writing to file %s: %w", filePath, err)
371 }
372
373 return nil
374}
Philip Zeyliger75bd37d2025-05-22 18:49:14 +0000375
376// AutoCommitDiffViewChanges automatically commits changes to the specified file
377// If the last commit message is exactly "User changes from diff view.", it amends the commit
378// Otherwise, it creates a new commit
379func AutoCommitDiffViewChanges(ctx context.Context, repoDir, filePath string) error {
380 // Check if the last commit has the expected message
381 cmd := exec.CommandContext(ctx, "git", "log", "-1", "--pretty=%s")
382 cmd.Dir = repoDir
383 output, err := cmd.Output()
384 commitMsg := strings.TrimSpace(string(output))
385
386 // Check if we should amend or create a new commit
387 const expectedMsg = "User changes from diff view."
388 amend := err == nil && commitMsg == expectedMsg
389
390 // Add the file to git
391 cmd = exec.CommandContext(ctx, "git", "add", filePath)
392 cmd.Dir = repoDir
393 if err := cmd.Run(); err != nil {
394 return fmt.Errorf("error adding file to git: %w", err)
395 }
396
397 // Commit the changes
398 if amend {
399 // Amend the previous commit
400 cmd = exec.CommandContext(ctx, "git", "commit", "--amend", "--no-edit")
401 } else {
402 // Create a new commit
403 cmd = exec.CommandContext(ctx, "git", "commit", "-m", expectedMsg, filePath)
404 }
405 cmd.Dir = repoDir
406
407 if err := cmd.Run(); err != nil {
408 return fmt.Errorf("error committing changes: %w", err)
409 }
410
411 return nil
412}