blob: a91071b04dbd6df7b0805c6a96d4c7dd3ec5ddf8 [file] [log] [blame]
Philip Zeyligerd3ac1122025-05-14 02:54:18 +00001// Package git_tools provides utilities for interacting with Git repositories.
2package git_tools
3
4import (
5 "bufio"
Josh Bleecher Snydera8561f72025-07-15 23:47:59 +00006 "bytes"
Philip Zeyliger75bd37d2025-05-22 18:49:14 +00007 "context"
Philip Zeyligerd3ac1122025-05-14 02:54:18 +00008 "fmt"
Philip Zeyliger272a90e2025-05-16 14:49:51 -07009 "os"
Philip Zeyligerd3ac1122025-05-14 02:54:18 +000010 "os/exec"
Philip Zeyliger272a90e2025-05-16 14:49:51 -070011 "path/filepath"
Philip Zeyligerd3ac1122025-05-14 02:54:18 +000012 "strings"
13)
14
15// DiffFile represents a file in a Git diff
16type DiffFile struct {
Philip Zeyligere89b3082025-05-29 03:16:06 +000017 Path string `json:"path"`
Josh Bleecher Snyderbcc1c412025-05-29 00:36:49 +000018 OldPath string `json:"old_path"` // Original path for renames and copies
Philip Zeyligere89b3082025-05-29 03:16:06 +000019 OldMode string `json:"old_mode"`
20 NewMode string `json:"new_mode"`
21 OldHash string `json:"old_hash"`
22 NewHash string `json:"new_hash"`
Josh Bleecher Snyderbcc1c412025-05-29 00:36:49 +000023 Status string `json:"status"` // A=added, M=modified, D=deleted, R=renamed, C=copied
Philip Zeyligere89b3082025-05-29 03:16:06 +000024 Additions int `json:"additions"` // Number of lines added
25 Deletions int `json:"deletions"` // Number of lines deleted
Josh Bleecher Snyderbcc1c412025-05-29 00:36:49 +000026}
27
28// GitRawDiff returns a structured representation of the Git diff between two commits or references
Philip Zeyliger272a90e2025-05-16 14:49:51 -070029// If 'to' is empty, it will show unstaged changes (diff with working directory)
Philip Zeyligerd3ac1122025-05-14 02:54:18 +000030func GitRawDiff(repoDir, from, to string) ([]DiffFile, error) {
Josh Bleecher Snyderbcc1c412025-05-29 00:36:49 +000031 // Git command to generate the diff in raw format with full hashes and rename/copy detection
32 // --find-copies-harder enables more aggressive copy detection
Philip Zeyligere89b3082025-05-29 03:16:06 +000033 var rawCmd, numstatCmd *exec.Cmd
Philip Zeyliger272a90e2025-05-16 14:49:51 -070034 if to == "" {
35 // If 'to' is empty, show unstaged changes
Josh Bleecher Snyderbcc1c412025-05-29 00:36:49 +000036 rawCmd = exec.Command("git", "-C", repoDir, "diff", "--raw", "--abbrev=40", "-M", "-C", "--find-copies-harder", from)
Philip Zeyligere89b3082025-05-29 03:16:06 +000037 numstatCmd = exec.Command("git", "-C", repoDir, "diff", "--numstat", from)
Philip Zeyliger272a90e2025-05-16 14:49:51 -070038 } else {
39 // Normal diff between two refs
Josh Bleecher Snyderbcc1c412025-05-29 00:36:49 +000040 rawCmd = exec.Command("git", "-C", repoDir, "diff", "--raw", "--abbrev=40", "-M", "-C", "--find-copies-harder", from, to)
Philip Zeyligere89b3082025-05-29 03:16:06 +000041 numstatCmd = exec.Command("git", "-C", repoDir, "diff", "--numstat", from, to)
Philip Zeyliger272a90e2025-05-16 14:49:51 -070042 }
43
Philip Zeyligere89b3082025-05-29 03:16:06 +000044 // Execute raw diff command
45 rawOut, err := rawCmd.CombinedOutput()
Philip Zeyligerd3ac1122025-05-14 02:54:18 +000046 if err != nil {
Philip Zeyligere89b3082025-05-29 03:16:06 +000047 return nil, fmt.Errorf("error executing git diff --raw: %w - %s", err, string(rawOut))
48 }
49
50 // Execute numstat command
51 numstatOut, err := numstatCmd.CombinedOutput()
52 if err != nil {
53 return nil, fmt.Errorf("error executing git diff --numstat: %w - %s", err, string(numstatOut))
Philip Zeyligerd3ac1122025-05-14 02:54:18 +000054 }
55
56 // Parse the raw diff output into structured format
Philip Zeyligere89b3082025-05-29 03:16:06 +000057 return parseRawDiffWithNumstat(string(rawOut), string(numstatOut))
Philip Zeyligerd3ac1122025-05-14 02:54:18 +000058}
59
60// GitShow returns the result of git show for a specific commit hash
61func GitShow(repoDir, hash string) (string, error) {
62 cmd := exec.Command("git", "-C", repoDir, "show", hash)
63 out, err := cmd.CombinedOutput()
64 if err != nil {
65 return "", fmt.Errorf("error executing git show: %w - %s", err, string(out))
66 }
67 return string(out), nil
68}
69
Philip Zeyligere89b3082025-05-29 03:16:06 +000070// parseRawDiffWithNumstat converts git diff --raw and --numstat output into structured format
71func parseRawDiffWithNumstat(rawOutput, numstatOutput string) ([]DiffFile, error) {
72 // First parse the raw diff to get the base file information
73 files, err := parseRawDiff(rawOutput)
74 if err != nil {
75 return nil, err
76 }
77
78 // Create a map to store numstat data by file path
79 numstatMap := make(map[string]struct{ additions, deletions int })
80
81 // Parse numstat output
82 if numstatOutput != "" {
83 scanner := bufio.NewScanner(strings.NewReader(strings.TrimSpace(numstatOutput)))
84 for scanner.Scan() {
85 line := scanner.Text()
86 // Format: additions\tdeletions\tfilename
87 // Example: 5\t3\tpath/to/file.go
88 parts := strings.Split(line, "\t")
89 if len(parts) >= 3 {
90 additions := 0
91 deletions := 0
92
93 // Handle binary files (marked with "-")
94 if parts[0] != "-" {
95 if add, err := fmt.Sscanf(parts[0], "%d", &additions); err != nil || add != 1 {
96 additions = 0
97 }
98 }
99 if parts[1] != "-" {
100 if del, err := fmt.Sscanf(parts[1], "%d", &deletions); err != nil || del != 1 {
101 deletions = 0
102 }
103 }
104
105 filePath := strings.Join(parts[2:], "\t") // Handle filenames with tabs
106 numstatMap[filePath] = struct{ additions, deletions int }{additions, deletions}
107 }
108 }
109 }
110
111 // Merge numstat data into files
112 for i := range files {
113 if stats, found := numstatMap[files[i].Path]; found {
114 files[i].Additions = stats.additions
115 files[i].Deletions = stats.deletions
116 }
117 }
118
119 return files, nil
120}
121
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000122// parseRawDiff converts git diff --raw output into structured format
Josh Bleecher Snyderbcc1c412025-05-29 00:36:49 +0000123// Handles both regular changes and rename/copy operations
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000124func parseRawDiff(diffOutput string) ([]DiffFile, error) {
125 var files []DiffFile
126 if diffOutput == "" {
127 return files, nil
128 }
129
130 // Process diff output line by line
131 scanner := bufio.NewScanner(strings.NewReader(strings.TrimSpace(diffOutput)))
132 for scanner.Scan() {
133 line := scanner.Text()
134 // Format: :oldmode newmode oldhash newhash status\tpath
135 // Example: :000000 100644 0000000000000000000000000000000000000000 6b33680ae6de90edd5f627c84147f7a41aa9d9cf A git_tools/git_tools.go
Josh Bleecher Snyderbcc1c412025-05-29 00:36:49 +0000136 // For renames: :100644 100644 oldHash newHash R100\told_path\tnew_path
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000137 if !strings.HasPrefix(line, ":") {
138 continue
139 }
140
141 parts := strings.Fields(line[1:]) // Skip the leading colon
142 if len(parts) < 5 {
143 continue // Not enough parts, skip this line
144 }
145
146 oldMode := parts[0]
147 newMode := parts[1]
148 oldHash := parts[2]
149 newHash := parts[3]
150 status := parts[4]
151
Josh Bleecher Snyderbcc1c412025-05-29 00:36:49 +0000152 // Find the tab after the status field
153 tabIndex := strings.Index(line, "\t")
154 if tabIndex == -1 {
155 continue // No tab found, malformed line
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000156 }
157
Josh Bleecher Snyderbcc1c412025-05-29 00:36:49 +0000158 // Extract paths after the tab
159 pathPart := line[tabIndex+1:]
160
161 // Handle rename/copy operations (status starts with R or C)
162 if strings.HasPrefix(status, "R") || strings.HasPrefix(status, "C") {
163 // For renames/copies, the path part contains: old_path\tnew_path
164 pathParts := strings.Split(pathPart, "\t")
165 if len(pathParts) == 2 {
166 // Preserve rename/copy as a single entry with both paths
167 oldPath := pathParts[0]
168 newPath := pathParts[1]
169
170 files = append(files, DiffFile{
171 Path: newPath, // New path as primary path
172 OldPath: oldPath, // Original path for rename/copy
173 OldMode: oldMode,
174 NewMode: newMode,
175 OldHash: oldHash,
176 NewHash: newHash,
177 Status: status, // Preserve original R* or C* status
178 })
179 } else {
180 // Malformed rename, treat as regular change
181 files = append(files, DiffFile{
182 Path: pathPart,
183 OldPath: "",
184 OldMode: oldMode,
185 NewMode: newMode,
186 OldHash: oldHash,
187 NewHash: newHash,
188 Status: status,
189 })
190 }
191 } else {
192 // Regular change (A, M, D)
193 files = append(files, DiffFile{
194 Path: pathPart,
195 OldPath: "", // No old path for regular changes
196 OldMode: oldMode,
197 NewMode: newMode,
198 OldHash: oldHash,
199 NewHash: newHash,
200 Status: status,
201 })
202 }
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000203 }
204
205 return files, nil
206}
207
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700208// GitLogEntry represents a single entry in the git log
209type GitLogEntry struct {
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000210 Hash string `json:"hash"` // The full commit hash
211 Refs []string `json:"refs"` // References (branches, tags) pointing to this commit
212 Subject string `json:"subject"` // The commit subject/message
213}
214
215// GitRecentLog returns the recent commit log between the initial commit and HEAD
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700216func GitRecentLog(repoDir string, initialCommitHash string) ([]GitLogEntry, error) {
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000217 // Validate input
218 if initialCommitHash == "" {
219 return nil, fmt.Errorf("initial commit hash must be provided")
220 }
221
222 // Find merge-base of HEAD and initial commit
223 cmdMergeBase := exec.Command("git", "-C", repoDir, "merge-base", "HEAD", initialCommitHash)
224 mergeBase, err := cmdMergeBase.CombinedOutput()
225 if err != nil {
226 // If merge-base fails (which can happen in simple repos), use initialCommitHash
227 return getGitLog(repoDir, initialCommitHash)
228 }
229
230 mergeBaseHash := strings.TrimSpace(string(mergeBase))
231 if mergeBaseHash == "" {
232 // If merge-base doesn't return a valid hash, use initialCommitHash
233 return getGitLog(repoDir, initialCommitHash)
234 }
235
236 // Use the merge-base as the 'from' point
237 return getGitLog(repoDir, mergeBaseHash)
238}
239
240// getGitLog gets the git log with the specified format using the provided fromCommit
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700241func getGitLog(repoDir string, fromCommit string) ([]GitLogEntry, error) {
Philip Zeyliger20372412025-06-05 02:12:10 +0000242 // Try to find the best commit range, starting from 10 commits back and working down to 0
243 var fromRange string
244 for i := 10; i >= 0; i-- {
245 if i == 0 {
246 // Use just fromCommit..HEAD as the range (no offset)
247 fromRange = fromCommit + "..HEAD"
248 break
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000249 }
Philip Zeyliger20372412025-06-05 02:12:10 +0000250
251 // Check if fromCommit~i exists
252 checkCmd := exec.Command("git", "-C", repoDir, "rev-parse", "--verify", fromCommit+fmt.Sprintf("~%d", i))
253 if err := checkCmd.Run(); err == nil {
254 // This offset works, use it
255 fromRange = fromCommit + fmt.Sprintf("~%d..HEAD", i)
256 break
257 }
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000258 }
259
Philip Zeyliger20372412025-06-05 02:12:10 +0000260 // Use the determined range with the specified format for easy parsing
Philip Zeyligerf18aafd2025-07-21 15:15:54 -0700261 cmd := exec.Command("git", "-C", repoDir, "log", "--boundary", "-n", "1000", "--oneline", "--decorate", "--pretty=%H%x00%s%x00%d", fromRange)
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000262 out, err := cmd.CombinedOutput()
263 if err != nil {
264 return nil, fmt.Errorf("error executing git log: %w - %s", err, string(out))
265 }
266
267 return parseGitLog(string(out))
268}
269
270// parseGitLog parses the output of git log with null-separated fields
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700271func parseGitLog(logOutput string) ([]GitLogEntry, error) {
272 var entries []GitLogEntry
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000273 if logOutput == "" {
274 return entries, nil
275 }
276
277 scanner := bufio.NewScanner(strings.NewReader(strings.TrimSpace(logOutput)))
278 for scanner.Scan() {
279 line := scanner.Text()
280 parts := strings.Split(line, "\x00")
281 if len(parts) != 3 {
282 continue // Skip malformed lines
283 }
284
285 hash := parts[0]
286 subject := parts[1]
287 decoration := parts[2]
288
289 // Parse the refs from the decoration
290 refs := parseRefs(decoration)
291
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700292 entries = append(entries, GitLogEntry{
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000293 Hash: hash,
294 Refs: refs,
295 Subject: subject,
296 })
297 }
298
299 return entries, nil
300}
301
302// parseRefs extracts references from git decoration format
303func parseRefs(decoration string) []string {
304 // The decoration format from %d is: (HEAD -> main, origin/main, tag: v1.0.0)
305 if decoration == "" {
306 return nil
307 }
308
309 // Remove surrounding parentheses and whitespace
310 decoration = strings.TrimSpace(decoration)
311 decoration = strings.TrimPrefix(decoration, " (")
312 decoration = strings.TrimPrefix(decoration, "(")
313 decoration = strings.TrimSuffix(decoration, ")")
314 decoration = strings.TrimSuffix(decoration, ") ")
315
316 if decoration == "" {
317 return nil
318 }
319
320 // Split by comma
321 parts := strings.Split(decoration, ", ")
322
323 // Process each part
324 var refs []string
325 for _, part := range parts {
326 part = strings.TrimSpace(part)
327 if part == "" {
328 continue
329 }
330
331 // Handle HEAD -> branch format
332 if strings.HasPrefix(part, "HEAD -> ") {
333 refs = append(refs, strings.TrimPrefix(part, "HEAD -> "))
334 continue
335 }
336
337 // Handle tag: format
338 if strings.HasPrefix(part, "tag: ") {
339 refs = append(refs, strings.TrimPrefix(part, "tag: "))
340 continue
341 }
342
343 // Handle just HEAD (no branch)
344 if part == "HEAD" {
345 refs = append(refs, part)
346 continue
347 }
348
349 // Regular branch name
350 refs = append(refs, part)
351 }
352
353 return refs
354}
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700355
356// validateRepoPath verifies that a file is tracked by git and within the repository boundaries
357// Returns the full path to the file if valid
358func validateRepoPath(repoDir, filePath string) (string, error) {
359 // First verify that the requested file is tracked by git to prevent
360 // access to files outside the repository
361 cmd := exec.Command("git", "-C", repoDir, "ls-files", "--error-unmatch", filePath)
362 if err := cmd.Run(); err != nil {
363 return "", fmt.Errorf("file not tracked by git or outside repository: %s", filePath)
364 }
365
366 // Construct the full file path
367 fullPath := filepath.Join(repoDir, filePath)
368
369 // Validate that the resolved path is still within the repository directory
370 // to prevent directory traversal attacks (e.g., ../../../etc/passwd)
371 absRepoDir, err := filepath.Abs(repoDir)
372 if err != nil {
373 return "", fmt.Errorf("unable to resolve absolute repository path: %w", err)
374 }
375
376 absFilePath, err := filepath.Abs(fullPath)
377 if err != nil {
378 return "", fmt.Errorf("unable to resolve absolute file path: %w", err)
379 }
380
381 // Check that the absolute file path starts with the absolute repository path
382 if !strings.HasPrefix(absFilePath, absRepoDir+string(filepath.Separator)) {
383 return "", fmt.Errorf("file path outside repository: %s", filePath)
384 }
385
386 return fullPath, nil
387}
388
389// GitCat returns the contents of a file in the repository at the given path
390// This is used to get the current working copy of a file (not using git show)
391func GitCat(repoDir, filePath string) (string, error) {
392 fullPath, err := validateRepoPath(repoDir, filePath)
393 if err != nil {
394 return "", err
395 }
396
397 // Read the file
398 content, err := os.ReadFile(fullPath)
399 if err != nil {
400 return "", fmt.Errorf("error reading file %s: %w", filePath, err)
401 }
402
403 return string(content), nil
404}
405
406// GitSaveFile saves content to a file in the repository, checking first that it's tracked by git
407// This prevents writing to files outside the repository
408func GitSaveFile(repoDir, filePath, content string) error {
409 fullPath, err := validateRepoPath(repoDir, filePath)
410 if err != nil {
411 return err
412 }
413
414 // Write the content to the file
Autoformatter8c463622025-05-16 21:54:17 +0000415 err = os.WriteFile(fullPath, []byte(content), 0o644)
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700416 if err != nil {
417 return fmt.Errorf("error writing to file %s: %w", filePath, err)
418 }
419
420 return nil
421}
Philip Zeyliger75bd37d2025-05-22 18:49:14 +0000422
423// AutoCommitDiffViewChanges automatically commits changes to the specified file
424// If the last commit message is exactly "User changes from diff view.", it amends the commit
425// Otherwise, it creates a new commit
426func AutoCommitDiffViewChanges(ctx context.Context, repoDir, filePath string) error {
427 // Check if the last commit has the expected message
428 cmd := exec.CommandContext(ctx, "git", "log", "-1", "--pretty=%s")
429 cmd.Dir = repoDir
430 output, err := cmd.Output()
431 commitMsg := strings.TrimSpace(string(output))
432
433 // Check if we should amend or create a new commit
434 const expectedMsg = "User changes from diff view."
435 amend := err == nil && commitMsg == expectedMsg
436
Philip Zeyliger75bd37d2025-05-22 18:49:14 +0000437 // Commit the changes
Philip Zeyligere34ffd62025-07-25 13:20:49 -0700438 // Instead of calling git add first, we call git commit with a filepsec, which works the same,
439 // but would fail if the file isn't tracked by git already.
Philip Zeyliger75bd37d2025-05-22 18:49:14 +0000440 if amend {
441 // Amend the previous commit
Philip Zeyligere34ffd62025-07-25 13:20:49 -0700442 cmd = exec.CommandContext(ctx, "git", "commit", "--amend", "--no-edit", "--", filePath)
Philip Zeyliger75bd37d2025-05-22 18:49:14 +0000443 } else {
444 // Create a new commit
Philip Zeyligere34ffd62025-07-25 13:20:49 -0700445 cmd = exec.CommandContext(ctx, "git", "commit", "-m", expectedMsg, "--", filePath)
Philip Zeyliger75bd37d2025-05-22 18:49:14 +0000446 }
447 cmd.Dir = repoDir
448
Josh Bleecher Snyder95354b12025-07-22 00:03:50 +0000449 if output, err := cmd.CombinedOutput(); err != nil {
450 return fmt.Errorf("error committing changes: %w - git output: %s", err, string(output))
Philip Zeyliger75bd37d2025-05-22 18:49:14 +0000451 }
452
453 return nil
454}
Josh Bleecher Snydera8561f72025-07-15 23:47:59 +0000455
456// GitGetUntrackedFiles returns a list of untracked files in the repository
457func GitGetUntrackedFiles(repoDir string) ([]string, error) {
458 cmd := exec.Command("git", "-C", repoDir, "ls-files", "--others", "--exclude-standard", "-z")
459 output, err := cmd.CombinedOutput()
460 if err != nil {
461 return nil, fmt.Errorf("error executing git ls-files: %w - %s", err, string(output))
462 }
463 var result []string
464 for path := range bytes.SplitSeq(output, []byte{0}) {
465 path = bytes.TrimSpace(path)
466 if len(path) == 0 {
467 continue
468 }
469 result = append(result, string(path))
470 }
471 return result, nil
472}