blob: 18a241d9183b579c14e1057b1eebe9bd37509ced [file] [log] [blame]
Philip Zeyligerd3ac1122025-05-14 02:54:18 +00001// Package git_tools provides utilities for interacting with Git repositories.
2package git_tools
3
4import (
5 "bufio"
Philip Zeyliger75bd37d2025-05-22 18:49:14 +00006 "context"
Philip Zeyligerd3ac1122025-05-14 02:54:18 +00007 "fmt"
Philip Zeyliger272a90e2025-05-16 14:49:51 -07008 "os"
Philip Zeyligerd3ac1122025-05-14 02:54:18 +00009 "os/exec"
Philip Zeyliger272a90e2025-05-16 14:49:51 -070010 "path/filepath"
Philip Zeyligerd3ac1122025-05-14 02:54:18 +000011 "strings"
12)
13
14// DiffFile represents a file in a Git diff
15type DiffFile struct {
Philip Zeyligere89b3082025-05-29 03:16:06 +000016 Path string `json:"path"`
Josh Bleecher Snyderbcc1c412025-05-29 00:36:49 +000017 OldPath string `json:"old_path"` // Original path for renames and copies
Philip Zeyligere89b3082025-05-29 03:16:06 +000018 OldMode string `json:"old_mode"`
19 NewMode string `json:"new_mode"`
20 OldHash string `json:"old_hash"`
21 NewHash string `json:"new_hash"`
Josh Bleecher Snyderbcc1c412025-05-29 00:36:49 +000022 Status string `json:"status"` // A=added, M=modified, D=deleted, R=renamed, C=copied
Philip Zeyligere89b3082025-05-29 03:16:06 +000023 Additions int `json:"additions"` // Number of lines added
24 Deletions int `json:"deletions"` // Number of lines deleted
Josh Bleecher Snyderbcc1c412025-05-29 00:36:49 +000025}
26
27// GitRawDiff returns a structured representation of the Git diff between two commits or references
Philip Zeyliger272a90e2025-05-16 14:49:51 -070028// If 'to' is empty, it will show unstaged changes (diff with working directory)
Philip Zeyligerd3ac1122025-05-14 02:54:18 +000029func GitRawDiff(repoDir, from, to string) ([]DiffFile, error) {
Josh Bleecher Snyderbcc1c412025-05-29 00:36:49 +000030 // Git command to generate the diff in raw format with full hashes and rename/copy detection
31 // --find-copies-harder enables more aggressive copy detection
Philip Zeyligere89b3082025-05-29 03:16:06 +000032 var rawCmd, numstatCmd *exec.Cmd
Philip Zeyliger272a90e2025-05-16 14:49:51 -070033 if to == "" {
34 // If 'to' is empty, show unstaged changes
Josh Bleecher Snyderbcc1c412025-05-29 00:36:49 +000035 rawCmd = exec.Command("git", "-C", repoDir, "diff", "--raw", "--abbrev=40", "-M", "-C", "--find-copies-harder", from)
Philip Zeyligere89b3082025-05-29 03:16:06 +000036 numstatCmd = exec.Command("git", "-C", repoDir, "diff", "--numstat", from)
Philip Zeyliger272a90e2025-05-16 14:49:51 -070037 } else {
38 // Normal diff between two refs
Josh Bleecher Snyderbcc1c412025-05-29 00:36:49 +000039 rawCmd = exec.Command("git", "-C", repoDir, "diff", "--raw", "--abbrev=40", "-M", "-C", "--find-copies-harder", from, to)
Philip Zeyligere89b3082025-05-29 03:16:06 +000040 numstatCmd = exec.Command("git", "-C", repoDir, "diff", "--numstat", from, to)
Philip Zeyliger272a90e2025-05-16 14:49:51 -070041 }
42
Philip Zeyligere89b3082025-05-29 03:16:06 +000043 // Execute raw diff command
44 rawOut, err := rawCmd.CombinedOutput()
Philip Zeyligerd3ac1122025-05-14 02:54:18 +000045 if err != nil {
Philip Zeyligere89b3082025-05-29 03:16:06 +000046 return nil, fmt.Errorf("error executing git diff --raw: %w - %s", err, string(rawOut))
47 }
48
49 // Execute numstat command
50 numstatOut, err := numstatCmd.CombinedOutput()
51 if err != nil {
52 return nil, fmt.Errorf("error executing git diff --numstat: %w - %s", err, string(numstatOut))
Philip Zeyligerd3ac1122025-05-14 02:54:18 +000053 }
54
55 // Parse the raw diff output into structured format
Philip Zeyligere89b3082025-05-29 03:16:06 +000056 return parseRawDiffWithNumstat(string(rawOut), string(numstatOut))
Philip Zeyligerd3ac1122025-05-14 02:54:18 +000057}
58
59// GitShow returns the result of git show for a specific commit hash
60func GitShow(repoDir, hash string) (string, error) {
61 cmd := exec.Command("git", "-C", repoDir, "show", hash)
62 out, err := cmd.CombinedOutput()
63 if err != nil {
64 return "", fmt.Errorf("error executing git show: %w - %s", err, string(out))
65 }
66 return string(out), nil
67}
68
Philip Zeyligere89b3082025-05-29 03:16:06 +000069// parseRawDiffWithNumstat converts git diff --raw and --numstat output into structured format
70func parseRawDiffWithNumstat(rawOutput, numstatOutput string) ([]DiffFile, error) {
71 // First parse the raw diff to get the base file information
72 files, err := parseRawDiff(rawOutput)
73 if err != nil {
74 return nil, err
75 }
76
77 // Create a map to store numstat data by file path
78 numstatMap := make(map[string]struct{ additions, deletions int })
79
80 // Parse numstat output
81 if numstatOutput != "" {
82 scanner := bufio.NewScanner(strings.NewReader(strings.TrimSpace(numstatOutput)))
83 for scanner.Scan() {
84 line := scanner.Text()
85 // Format: additions\tdeletions\tfilename
86 // Example: 5\t3\tpath/to/file.go
87 parts := strings.Split(line, "\t")
88 if len(parts) >= 3 {
89 additions := 0
90 deletions := 0
91
92 // Handle binary files (marked with "-")
93 if parts[0] != "-" {
94 if add, err := fmt.Sscanf(parts[0], "%d", &additions); err != nil || add != 1 {
95 additions = 0
96 }
97 }
98 if parts[1] != "-" {
99 if del, err := fmt.Sscanf(parts[1], "%d", &deletions); err != nil || del != 1 {
100 deletions = 0
101 }
102 }
103
104 filePath := strings.Join(parts[2:], "\t") // Handle filenames with tabs
105 numstatMap[filePath] = struct{ additions, deletions int }{additions, deletions}
106 }
107 }
108 }
109
110 // Merge numstat data into files
111 for i := range files {
112 if stats, found := numstatMap[files[i].Path]; found {
113 files[i].Additions = stats.additions
114 files[i].Deletions = stats.deletions
115 }
116 }
117
118 return files, nil
119}
120
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000121// parseRawDiff converts git diff --raw output into structured format
Josh Bleecher Snyderbcc1c412025-05-29 00:36:49 +0000122// Handles both regular changes and rename/copy operations
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000123func parseRawDiff(diffOutput string) ([]DiffFile, error) {
124 var files []DiffFile
125 if diffOutput == "" {
126 return files, nil
127 }
128
129 // Process diff output line by line
130 scanner := bufio.NewScanner(strings.NewReader(strings.TrimSpace(diffOutput)))
131 for scanner.Scan() {
132 line := scanner.Text()
133 // Format: :oldmode newmode oldhash newhash status\tpath
134 // Example: :000000 100644 0000000000000000000000000000000000000000 6b33680ae6de90edd5f627c84147f7a41aa9d9cf A git_tools/git_tools.go
Josh Bleecher Snyderbcc1c412025-05-29 00:36:49 +0000135 // For renames: :100644 100644 oldHash newHash R100\told_path\tnew_path
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000136 if !strings.HasPrefix(line, ":") {
137 continue
138 }
139
140 parts := strings.Fields(line[1:]) // Skip the leading colon
141 if len(parts) < 5 {
142 continue // Not enough parts, skip this line
143 }
144
145 oldMode := parts[0]
146 newMode := parts[1]
147 oldHash := parts[2]
148 newHash := parts[3]
149 status := parts[4]
150
Josh Bleecher Snyderbcc1c412025-05-29 00:36:49 +0000151 // Find the tab after the status field
152 tabIndex := strings.Index(line, "\t")
153 if tabIndex == -1 {
154 continue // No tab found, malformed line
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000155 }
156
Josh Bleecher Snyderbcc1c412025-05-29 00:36:49 +0000157 // Extract paths after the tab
158 pathPart := line[tabIndex+1:]
159
160 // Handle rename/copy operations (status starts with R or C)
161 if strings.HasPrefix(status, "R") || strings.HasPrefix(status, "C") {
162 // For renames/copies, the path part contains: old_path\tnew_path
163 pathParts := strings.Split(pathPart, "\t")
164 if len(pathParts) == 2 {
165 // Preserve rename/copy as a single entry with both paths
166 oldPath := pathParts[0]
167 newPath := pathParts[1]
168
169 files = append(files, DiffFile{
170 Path: newPath, // New path as primary path
171 OldPath: oldPath, // Original path for rename/copy
172 OldMode: oldMode,
173 NewMode: newMode,
174 OldHash: oldHash,
175 NewHash: newHash,
176 Status: status, // Preserve original R* or C* status
177 })
178 } else {
179 // Malformed rename, treat as regular change
180 files = append(files, DiffFile{
181 Path: pathPart,
182 OldPath: "",
183 OldMode: oldMode,
184 NewMode: newMode,
185 OldHash: oldHash,
186 NewHash: newHash,
187 Status: status,
188 })
189 }
190 } else {
191 // Regular change (A, M, D)
192 files = append(files, DiffFile{
193 Path: pathPart,
194 OldPath: "", // No old path for regular changes
195 OldMode: oldMode,
196 NewMode: newMode,
197 OldHash: oldHash,
198 NewHash: newHash,
199 Status: status,
200 })
201 }
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000202 }
203
204 return files, nil
205}
206
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700207// GitLogEntry represents a single entry in the git log
208type GitLogEntry struct {
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000209 Hash string `json:"hash"` // The full commit hash
210 Refs []string `json:"refs"` // References (branches, tags) pointing to this commit
211 Subject string `json:"subject"` // The commit subject/message
212}
213
214// GitRecentLog returns the recent commit log between the initial commit and HEAD
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700215func GitRecentLog(repoDir string, initialCommitHash string) ([]GitLogEntry, error) {
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000216 // Validate input
217 if initialCommitHash == "" {
218 return nil, fmt.Errorf("initial commit hash must be provided")
219 }
220
221 // Find merge-base of HEAD and initial commit
222 cmdMergeBase := exec.Command("git", "-C", repoDir, "merge-base", "HEAD", initialCommitHash)
223 mergeBase, err := cmdMergeBase.CombinedOutput()
224 if err != nil {
225 // If merge-base fails (which can happen in simple repos), use initialCommitHash
226 return getGitLog(repoDir, initialCommitHash)
227 }
228
229 mergeBaseHash := strings.TrimSpace(string(mergeBase))
230 if mergeBaseHash == "" {
231 // If merge-base doesn't return a valid hash, use initialCommitHash
232 return getGitLog(repoDir, initialCommitHash)
233 }
234
235 // Use the merge-base as the 'from' point
236 return getGitLog(repoDir, mergeBaseHash)
237}
238
239// getGitLog gets the git log with the specified format using the provided fromCommit
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700240func getGitLog(repoDir string, fromCommit string) ([]GitLogEntry, error) {
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000241 // Check if fromCommit~10 exists (10 commits before fromCommit)
242 checkCmd := exec.Command("git", "-C", repoDir, "rev-parse", "--verify", fromCommit+"~10")
243 if err := checkCmd.Run(); err != nil {
244 // If fromCommit~10 doesn't exist, use just fromCommit..HEAD as the range
245 cmd := exec.Command("git", "-C", repoDir, "log", "-n", "1000", "--oneline", "--decorate", "--pretty=%H%x00%s%x00%d", fromCommit+"..HEAD")
246 out, err := cmd.CombinedOutput()
247 if err != nil {
248 return nil, fmt.Errorf("error executing git log: %w - %s", err, string(out))
249 }
250 return parseGitLog(string(out))
251 }
252
253 // Use fromCommit~10..HEAD range with the specified format for easy parsing
254 cmd := exec.Command("git", "-C", repoDir, "log", "-n", "1000", "--oneline", "--decorate", "--pretty=%H%x00%s%x00%d", fromCommit+"~10..HEAD")
255 out, err := cmd.CombinedOutput()
256 if err != nil {
257 return nil, fmt.Errorf("error executing git log: %w - %s", err, string(out))
258 }
259
260 return parseGitLog(string(out))
261}
262
263// parseGitLog parses the output of git log with null-separated fields
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700264func parseGitLog(logOutput string) ([]GitLogEntry, error) {
265 var entries []GitLogEntry
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000266 if logOutput == "" {
267 return entries, nil
268 }
269
270 scanner := bufio.NewScanner(strings.NewReader(strings.TrimSpace(logOutput)))
271 for scanner.Scan() {
272 line := scanner.Text()
273 parts := strings.Split(line, "\x00")
274 if len(parts) != 3 {
275 continue // Skip malformed lines
276 }
277
278 hash := parts[0]
279 subject := parts[1]
280 decoration := parts[2]
281
282 // Parse the refs from the decoration
283 refs := parseRefs(decoration)
284
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700285 entries = append(entries, GitLogEntry{
Philip Zeyligerd3ac1122025-05-14 02:54:18 +0000286 Hash: hash,
287 Refs: refs,
288 Subject: subject,
289 })
290 }
291
292 return entries, nil
293}
294
295// parseRefs extracts references from git decoration format
296func parseRefs(decoration string) []string {
297 // The decoration format from %d is: (HEAD -> main, origin/main, tag: v1.0.0)
298 if decoration == "" {
299 return nil
300 }
301
302 // Remove surrounding parentheses and whitespace
303 decoration = strings.TrimSpace(decoration)
304 decoration = strings.TrimPrefix(decoration, " (")
305 decoration = strings.TrimPrefix(decoration, "(")
306 decoration = strings.TrimSuffix(decoration, ")")
307 decoration = strings.TrimSuffix(decoration, ") ")
308
309 if decoration == "" {
310 return nil
311 }
312
313 // Split by comma
314 parts := strings.Split(decoration, ", ")
315
316 // Process each part
317 var refs []string
318 for _, part := range parts {
319 part = strings.TrimSpace(part)
320 if part == "" {
321 continue
322 }
323
324 // Handle HEAD -> branch format
325 if strings.HasPrefix(part, "HEAD -> ") {
326 refs = append(refs, strings.TrimPrefix(part, "HEAD -> "))
327 continue
328 }
329
330 // Handle tag: format
331 if strings.HasPrefix(part, "tag: ") {
332 refs = append(refs, strings.TrimPrefix(part, "tag: "))
333 continue
334 }
335
336 // Handle just HEAD (no branch)
337 if part == "HEAD" {
338 refs = append(refs, part)
339 continue
340 }
341
342 // Regular branch name
343 refs = append(refs, part)
344 }
345
346 return refs
347}
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700348
349// validateRepoPath verifies that a file is tracked by git and within the repository boundaries
350// Returns the full path to the file if valid
351func validateRepoPath(repoDir, filePath string) (string, error) {
352 // First verify that the requested file is tracked by git to prevent
353 // access to files outside the repository
354 cmd := exec.Command("git", "-C", repoDir, "ls-files", "--error-unmatch", filePath)
355 if err := cmd.Run(); err != nil {
356 return "", fmt.Errorf("file not tracked by git or outside repository: %s", filePath)
357 }
358
359 // Construct the full file path
360 fullPath := filepath.Join(repoDir, filePath)
361
362 // Validate that the resolved path is still within the repository directory
363 // to prevent directory traversal attacks (e.g., ../../../etc/passwd)
364 absRepoDir, err := filepath.Abs(repoDir)
365 if err != nil {
366 return "", fmt.Errorf("unable to resolve absolute repository path: %w", err)
367 }
368
369 absFilePath, err := filepath.Abs(fullPath)
370 if err != nil {
371 return "", fmt.Errorf("unable to resolve absolute file path: %w", err)
372 }
373
374 // Check that the absolute file path starts with the absolute repository path
375 if !strings.HasPrefix(absFilePath, absRepoDir+string(filepath.Separator)) {
376 return "", fmt.Errorf("file path outside repository: %s", filePath)
377 }
378
379 return fullPath, nil
380}
381
382// GitCat returns the contents of a file in the repository at the given path
383// This is used to get the current working copy of a file (not using git show)
384func GitCat(repoDir, filePath string) (string, error) {
385 fullPath, err := validateRepoPath(repoDir, filePath)
386 if err != nil {
387 return "", err
388 }
389
390 // Read the file
391 content, err := os.ReadFile(fullPath)
392 if err != nil {
393 return "", fmt.Errorf("error reading file %s: %w", filePath, err)
394 }
395
396 return string(content), nil
397}
398
399// GitSaveFile saves content to a file in the repository, checking first that it's tracked by git
400// This prevents writing to files outside the repository
401func GitSaveFile(repoDir, filePath, content string) error {
402 fullPath, err := validateRepoPath(repoDir, filePath)
403 if err != nil {
404 return err
405 }
406
407 // Write the content to the file
Autoformatter8c463622025-05-16 21:54:17 +0000408 err = os.WriteFile(fullPath, []byte(content), 0o644)
Philip Zeyliger272a90e2025-05-16 14:49:51 -0700409 if err != nil {
410 return fmt.Errorf("error writing to file %s: %w", filePath, err)
411 }
412
413 return nil
414}
Philip Zeyliger75bd37d2025-05-22 18:49:14 +0000415
416// AutoCommitDiffViewChanges automatically commits changes to the specified file
417// If the last commit message is exactly "User changes from diff view.", it amends the commit
418// Otherwise, it creates a new commit
419func AutoCommitDiffViewChanges(ctx context.Context, repoDir, filePath string) error {
420 // Check if the last commit has the expected message
421 cmd := exec.CommandContext(ctx, "git", "log", "-1", "--pretty=%s")
422 cmd.Dir = repoDir
423 output, err := cmd.Output()
424 commitMsg := strings.TrimSpace(string(output))
425
426 // Check if we should amend or create a new commit
427 const expectedMsg = "User changes from diff view."
428 amend := err == nil && commitMsg == expectedMsg
429
430 // Add the file to git
431 cmd = exec.CommandContext(ctx, "git", "add", filePath)
432 cmd.Dir = repoDir
433 if err := cmd.Run(); err != nil {
434 return fmt.Errorf("error adding file to git: %w", err)
435 }
436
437 // Commit the changes
438 if amend {
439 // Amend the previous commit
440 cmd = exec.CommandContext(ctx, "git", "commit", "--amend", "--no-edit")
441 } else {
442 // Create a new commit
443 cmd = exec.CommandContext(ctx, "git", "commit", "-m", expectedMsg, filePath)
444 }
445 cmd.Dir = repoDir
446
447 if err := cmd.Run(); err != nil {
448 return fmt.Errorf("error committing changes: %w", err)
449 }
450
451 return nil
452}