blob: 6dddffb516161070c613bdd87c84c8bd82c4bef7 [file] [log] [blame]
Philip Zeyligerd3ac1122025-05-14 02:54:18 +00001// Package git_tools provides utilities for interacting with Git repositories.
2package git_tools
3
4import (
5 "bufio"
Philip Zeyliger75bd37d2025-05-22 18:49:14 +00006 "context"
Philip Zeyligerd3ac1122025-05-14 02:54:18 +00007 "fmt"
Philip Zeyliger272a90e2025-05-16 14:49:51 -07008 "os"
Philip Zeyligerd3ac1122025-05-14 02:54:18 +00009 "os/exec"
Philip Zeyliger272a90e2025-05-16 14:49:51 -070010 "path/filepath"
Philip Zeyligerd3ac1122025-05-14 02:54:18 +000011 "strings"
12)
13
14// DiffFile represents a file in a Git diff
15type DiffFile struct {
Philip Zeyligere89b3082025-05-29 03:16:06 +000016 Path string `json:"path"`
Josh Bleecher Snyderbcc1c412025-05-29 00:36:49 +000017 OldPath string `json:"old_path"` // Original path for renames and copies
Philip Zeyligere89b3082025-05-29 03:16:06 +000018 OldMode string `json:"old_mode"`
19 NewMode string `json:"new_mode"`
20 OldHash string `json:"old_hash"`
21 NewHash string `json:"new_hash"`
Josh Bleecher Snyderbcc1c412025-05-29 00:36:49 +000022 Status string `json:"status"` // A=added, M=modified, D=deleted, R=renamed, C=copied
Philip Zeyligere89b3082025-05-29 03:16:06 +000023 Additions int `json:"additions"` // Number of lines added
24 Deletions int `json:"deletions"` // Number of lines deleted
Josh Bleecher Snyderbcc1c412025-05-29 00:36:49 +000025}
26
27// GitRawDiff returns a structured representation of the Git diff between two commits or references
Philip Zeyliger272a90e2025-05-16 14:49:51 -070028// If 'to' is empty, it will show unstaged changes (diff with working directory)
Philip Zeyligerd3ac1122025-05-14 02:54:18 +000029func GitRawDiff(repoDir, from, to string) ([]DiffFile, error) {
Josh Bleecher Snyderbcc1c412025-05-29 00:36:49 +000030 // Git command to generate the diff in raw format with full hashes and rename/copy detection
31 // --find-copies-harder enables more aggressive copy detection
Philip Zeyligere89b3082025-05-29 03:16:06 +000032 var rawCmd, numstatCmd *exec.Cmd
Philip Zeyliger272a90e2025-05-16 14:49:51 -070033 if to == "" {
34 // If 'to' is empty, show unstaged changes
Josh Bleecher Snyderbcc1c412025-05-29 00:36:49 +000035 rawCmd = exec.Command("git", "-C", repoDir, "diff", "--raw", "--abbrev=40", "-M", "-C", "--find-copies-harder", from)
Philip Zeyligere89b3082025-05-29 03:16:06 +000036 numstatCmd = exec.Command("git", "-C", repoDir, "diff", "--numstat", from)
Philip Zeyliger272a90e2025-05-16 14:49:51 -070037 } else {
38 // Normal diff between two refs
Josh Bleecher Snyderbcc1c412025-05-29 00:36:49 +000039 rawCmd = exec.Command("git", "-C", repoDir, "diff", "--raw", "--abbrev=40", "-M", "-C", "--find-copies-harder", from, to)
Philip Zeyligere89b3082025-05-29 03:16:06 +000040 numstatCmd = exec.Command("git", "-C", repoDir, "diff", "--numstat", from, to)
Philip Zeyliger272a90e2025-05-16 14:49:51 -070041 }
42
Philip Zeyligere89b3082025-05-29 03:16:06 +000043 // Execute raw diff command
44 rawOut, err := rawCmd.CombinedOutput()
Philip Zeyligerd3ac1122025-05-14 02:54:18 +000045 if err != nil {
Philip Zeyligere89b3082025-05-29 03:16:06 +000046 return nil, fmt.Errorf("error executing git diff --raw: %w - %s", err, string(rawOut))
47 }
48
49 // Execute numstat command
50 numstatOut, err := numstatCmd.CombinedOutput()
51 if err != nil {
52 return nil, fmt.Errorf("error executing git diff --numstat: %w - %s", err, string(numstatOut))
Philip Zeyligerd3ac1122025-05-14 02:54:18 +000053 }
54
55 // Parse the raw diff output into structured format
Philip Zeyligere89b3082025-05-29 03:16:06 +000056 return parseRawDiffWithNumstat(string(rawOut), string(numstatOut))
Philip Zeyligerd3ac1122025-05-14 02:54:18 +000057}
58
59// GitShow returns the result of git show for a specific commit hash
60func GitShow(repoDir, hash string) (string, error) {
61 cmd := exec.Command("git", "-C", repoDir, "show", hash)
62 out, err := cmd.CombinedOutput()
63 if err != nil {
64 return "", fmt.Errorf("error executing git show: %w - %s", err, string(out))
65 }
66 return string(out), nil
67}
68
Philip Zeyligere89b3082025-05-29 03:16:06 +000069// parseRawDiffWithNumstat converts git diff --raw and --numstat output into structured format
70func parseRawDiffWithNumstat(rawOutput, numstatOutput string) ([]DiffFile, error) {
71 // First parse the raw diff to get the base file information
72 files, err := parseRawDiff(rawOutput)
73 if err != nil {
74 return nil, err
75 }
76
77 // Create a map to store numstat data by file path
78 numstatMap := make(map[string]struct{ additions, deletions int })
79
80 // Parse numstat output
81 if numstatOutput != "" {
82 scanner := bufio.NewScanner(strings.NewReader(strings.TrimSpace(numstatOutput)))
83 for scanner.Scan() {
84 line := scanner.Text()
85 // Format: additions\tdeletions\tfilename
86 // Example: 5\t3\tpath/to/file.go
87 parts := strings.Split(line, "\t")
88 if len(parts) >= 3 {
89 additions := 0
90 deletions := 0
91
92 // Handle binary files (marked with "-")
93 if parts[0] != "-" {
94 if add, err := fmt.Sscanf(parts[0], "%d", &additions); err != nil || add != 1 {
95 additions = 0
96 }
97 }
98 if parts[1] != "-" {
99 if del, err := fmt.Sscanf(parts[1], "%d", &deletions); err != nil || del != 1 {
100 deletions = 0
101 }
102 }
103
104 filePath := strings.Join(parts[2:], "\t") // Handle filenames with tabs
105 numstatMap[filePath] = struct{ additions, deletions int }{additions, deletions}
106 }
107 }
108 }
109
110 // Merge numstat data into files
111 for i := range files {
112 if stats, found := numstatMap[files[i].Path]; found {
113 files[i].Additions = stats.additions
114 files[i].Deletions = stats.deletions
115 }
116 }
117
118 return files, nil
119}
120
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000121// parseRawDiff converts git diff --raw output into structured format
Josh Bleecher Snyderbcc1c412025-05-29 00:36:49 +0000122// Handles both regular changes and rename/copy operations
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000123func parseRawDiff(diffOutput string) ([]DiffFile, error) {
124 var files []DiffFile
125 if diffOutput == "" {
126 return files, nil
127 }
128
129 // Process diff output line by line
130 scanner := bufio.NewScanner(strings.NewReader(strings.TrimSpace(diffOutput)))
131 for scanner.Scan() {
132 line := scanner.Text()
133 // Format: :oldmode newmode oldhash newhash status\tpath
134 // Example: :000000 100644 0000000000000000000000000000000000000000 6b33680ae6de90edd5f627c84147f7a41aa9d9cf A git_tools/git_tools.go
Josh Bleecher Snyderbcc1c412025-05-29 00:36:49 +0000135 // For renames: :100644 100644 oldHash newHash R100\told_path\tnew_path
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000136 if !strings.HasPrefix(line, ":") {
137 continue
138 }
139
140 parts := strings.Fields(line[1:]) // Skip the leading colon
141 if len(parts) < 5 {
142 continue // Not enough parts, skip this line
143 }
144
145 oldMode := parts[0]
146 newMode := parts[1]
147 oldHash := parts[2]
148 newHash := parts[3]
149 status := parts[4]
150
Josh Bleecher Snyderbcc1c412025-05-29 00:36:49 +0000151 // Find the tab after the status field
152 tabIndex := strings.Index(line, "\t")
153 if tabIndex == -1 {
154 continue // No tab found, malformed line
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000155 }
156
Josh Bleecher Snyderbcc1c412025-05-29 00:36:49 +0000157 // Extract paths after the tab
158 pathPart := line[tabIndex+1:]
159
160 // Handle rename/copy operations (status starts with R or C)
161 if strings.HasPrefix(status, "R") || strings.HasPrefix(status, "C") {
162 // For renames/copies, the path part contains: old_path\tnew_path
163 pathParts := strings.Split(pathPart, "\t")
164 if len(pathParts) == 2 {
165 // Preserve rename/copy as a single entry with both paths
166 oldPath := pathParts[0]
167 newPath := pathParts[1]
168
169 files = append(files, DiffFile{
170 Path: newPath, // New path as primary path
171 OldPath: oldPath, // Original path for rename/copy
172 OldMode: oldMode,
173 NewMode: newMode,
174 OldHash: oldHash,
175 NewHash: newHash,
176 Status: status, // Preserve original R* or C* status
177 })
178 } else {
179 // Malformed rename, treat as regular change
180 files = append(files, DiffFile{
181 Path: pathPart,
182 OldPath: "",
183 OldMode: oldMode,
184 NewMode: newMode,
185 OldHash: oldHash,
186 NewHash: newHash,
187 Status: status,
188 })
189 }
190 } else {
191 // Regular change (A, M, D)
192 files = append(files, DiffFile{
193 Path: pathPart,
194 OldPath: "", // No old path for regular changes
195 OldMode: oldMode,
196 NewMode: newMode,
197 OldHash: oldHash,
198 NewHash: newHash,
199 Status: status,
200 })
201 }
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000202 }
203
204 return files, nil
205}
206
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700207// GitLogEntry represents a single entry in the git log
208type GitLogEntry struct {
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000209 Hash string `json:"hash"` // The full commit hash
210 Refs []string `json:"refs"` // References (branches, tags) pointing to this commit
211 Subject string `json:"subject"` // The commit subject/message
212}
213
214// GitRecentLog returns the recent commit log between the initial commit and HEAD
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700215func GitRecentLog(repoDir string, initialCommitHash string) ([]GitLogEntry, error) {
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000216 // Validate input
217 if initialCommitHash == "" {
218 return nil, fmt.Errorf("initial commit hash must be provided")
219 }
220
221 // Find merge-base of HEAD and initial commit
222 cmdMergeBase := exec.Command("git", "-C", repoDir, "merge-base", "HEAD", initialCommitHash)
223 mergeBase, err := cmdMergeBase.CombinedOutput()
224 if err != nil {
225 // If merge-base fails (which can happen in simple repos), use initialCommitHash
226 return getGitLog(repoDir, initialCommitHash)
227 }
228
229 mergeBaseHash := strings.TrimSpace(string(mergeBase))
230 if mergeBaseHash == "" {
231 // If merge-base doesn't return a valid hash, use initialCommitHash
232 return getGitLog(repoDir, initialCommitHash)
233 }
234
235 // Use the merge-base as the 'from' point
236 return getGitLog(repoDir, mergeBaseHash)
237}
238
239// getGitLog gets the git log with the specified format using the provided fromCommit
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700240func getGitLog(repoDir string, fromCommit string) ([]GitLogEntry, error) {
Philip Zeyliger20372412025-06-05 02:12:10 +0000241 // Try to find the best commit range, starting from 10 commits back and working down to 0
242 var fromRange string
243 for i := 10; i >= 0; i-- {
244 if i == 0 {
245 // Use just fromCommit..HEAD as the range (no offset)
246 fromRange = fromCommit + "..HEAD"
247 break
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000248 }
Philip Zeyliger20372412025-06-05 02:12:10 +0000249
250 // Check if fromCommit~i exists
251 checkCmd := exec.Command("git", "-C", repoDir, "rev-parse", "--verify", fromCommit+fmt.Sprintf("~%d", i))
252 if err := checkCmd.Run(); err == nil {
253 // This offset works, use it
254 fromRange = fromCommit + fmt.Sprintf("~%d..HEAD", i)
255 break
256 }
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000257 }
258
Philip Zeyliger20372412025-06-05 02:12:10 +0000259 // Use the determined range with the specified format for easy parsing
260 cmd := exec.Command("git", "-C", repoDir, "log", "-n", "1000", "--oneline", "--decorate", "--pretty=%H%x00%s%x00%d", fromRange)
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000261 out, err := cmd.CombinedOutput()
262 if err != nil {
263 return nil, fmt.Errorf("error executing git log: %w - %s", err, string(out))
264 }
265
266 return parseGitLog(string(out))
267}
268
269// parseGitLog parses the output of git log with null-separated fields
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700270func parseGitLog(logOutput string) ([]GitLogEntry, error) {
271 var entries []GitLogEntry
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000272 if logOutput == "" {
273 return entries, nil
274 }
275
276 scanner := bufio.NewScanner(strings.NewReader(strings.TrimSpace(logOutput)))
277 for scanner.Scan() {
278 line := scanner.Text()
279 parts := strings.Split(line, "\x00")
280 if len(parts) != 3 {
281 continue // Skip malformed lines
282 }
283
284 hash := parts[0]
285 subject := parts[1]
286 decoration := parts[2]
287
288 // Parse the refs from the decoration
289 refs := parseRefs(decoration)
290
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700291 entries = append(entries, GitLogEntry{
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000292 Hash: hash,
293 Refs: refs,
294 Subject: subject,
295 })
296 }
297
298 return entries, nil
299}
300
301// parseRefs extracts references from git decoration format
302func parseRefs(decoration string) []string {
303 // The decoration format from %d is: (HEAD -> main, origin/main, tag: v1.0.0)
304 if decoration == "" {
305 return nil
306 }
307
308 // Remove surrounding parentheses and whitespace
309 decoration = strings.TrimSpace(decoration)
310 decoration = strings.TrimPrefix(decoration, " (")
311 decoration = strings.TrimPrefix(decoration, "(")
312 decoration = strings.TrimSuffix(decoration, ")")
313 decoration = strings.TrimSuffix(decoration, ") ")
314
315 if decoration == "" {
316 return nil
317 }
318
319 // Split by comma
320 parts := strings.Split(decoration, ", ")
321
322 // Process each part
323 var refs []string
324 for _, part := range parts {
325 part = strings.TrimSpace(part)
326 if part == "" {
327 continue
328 }
329
330 // Handle HEAD -> branch format
331 if strings.HasPrefix(part, "HEAD -> ") {
332 refs = append(refs, strings.TrimPrefix(part, "HEAD -> "))
333 continue
334 }
335
336 // Handle tag: format
337 if strings.HasPrefix(part, "tag: ") {
338 refs = append(refs, strings.TrimPrefix(part, "tag: "))
339 continue
340 }
341
342 // Handle just HEAD (no branch)
343 if part == "HEAD" {
344 refs = append(refs, part)
345 continue
346 }
347
348 // Regular branch name
349 refs = append(refs, part)
350 }
351
352 return refs
353}
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700354
355// validateRepoPath verifies that a file is tracked by git and within the repository boundaries
356// Returns the full path to the file if valid
357func validateRepoPath(repoDir, filePath string) (string, error) {
358 // First verify that the requested file is tracked by git to prevent
359 // access to files outside the repository
360 cmd := exec.Command("git", "-C", repoDir, "ls-files", "--error-unmatch", filePath)
361 if err := cmd.Run(); err != nil {
362 return "", fmt.Errorf("file not tracked by git or outside repository: %s", filePath)
363 }
364
365 // Construct the full file path
366 fullPath := filepath.Join(repoDir, filePath)
367
368 // Validate that the resolved path is still within the repository directory
369 // to prevent directory traversal attacks (e.g., ../../../etc/passwd)
370 absRepoDir, err := filepath.Abs(repoDir)
371 if err != nil {
372 return "", fmt.Errorf("unable to resolve absolute repository path: %w", err)
373 }
374
375 absFilePath, err := filepath.Abs(fullPath)
376 if err != nil {
377 return "", fmt.Errorf("unable to resolve absolute file path: %w", err)
378 }
379
380 // Check that the absolute file path starts with the absolute repository path
381 if !strings.HasPrefix(absFilePath, absRepoDir+string(filepath.Separator)) {
382 return "", fmt.Errorf("file path outside repository: %s", filePath)
383 }
384
385 return fullPath, nil
386}
387
388// GitCat returns the contents of a file in the repository at the given path
389// This is used to get the current working copy of a file (not using git show)
390func GitCat(repoDir, filePath string) (string, error) {
391 fullPath, err := validateRepoPath(repoDir, filePath)
392 if err != nil {
393 return "", err
394 }
395
396 // Read the file
397 content, err := os.ReadFile(fullPath)
398 if err != nil {
399 return "", fmt.Errorf("error reading file %s: %w", filePath, err)
400 }
401
402 return string(content), nil
403}
404
405// GitSaveFile saves content to a file in the repository, checking first that it's tracked by git
406// This prevents writing to files outside the repository
407func GitSaveFile(repoDir, filePath, content string) error {
408 fullPath, err := validateRepoPath(repoDir, filePath)
409 if err != nil {
410 return err
411 }
412
413 // Write the content to the file
Autoformatter8c463622025-05-16 21:54:17 +0000414 err = os.WriteFile(fullPath, []byte(content), 0o644)
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700415 if err != nil {
416 return fmt.Errorf("error writing to file %s: %w", filePath, err)
417 }
418
419 return nil
420}
Philip Zeyliger75bd37d2025-05-22 18:49:14 +0000421
422// AutoCommitDiffViewChanges automatically commits changes to the specified file
423// If the last commit message is exactly "User changes from diff view.", it amends the commit
424// Otherwise, it creates a new commit
425func AutoCommitDiffViewChanges(ctx context.Context, repoDir, filePath string) error {
426 // Check if the last commit has the expected message
427 cmd := exec.CommandContext(ctx, "git", "log", "-1", "--pretty=%s")
428 cmd.Dir = repoDir
429 output, err := cmd.Output()
430 commitMsg := strings.TrimSpace(string(output))
431
432 // Check if we should amend or create a new commit
433 const expectedMsg = "User changes from diff view."
434 amend := err == nil && commitMsg == expectedMsg
435
436 // Add the file to git
437 cmd = exec.CommandContext(ctx, "git", "add", filePath)
438 cmd.Dir = repoDir
439 if err := cmd.Run(); err != nil {
440 return fmt.Errorf("error adding file to git: %w", err)
441 }
442
443 // Commit the changes
444 if amend {
445 // Amend the previous commit
446 cmd = exec.CommandContext(ctx, "git", "commit", "--amend", "--no-edit")
447 } else {
448 // Create a new commit
449 cmd = exec.CommandContext(ctx, "git", "commit", "-m", expectedMsg, filePath)
450 }
451 cmd.Dir = repoDir
452
453 if err := cmd.Run(); err != nil {
454 return fmt.Errorf("error committing changes: %w", err)
455 }
456
457 return nil
458}