blob: b40396a692874b087fd3025b1c707f83fd31c6b6 [file] [log] [blame]
Earl Lee2e463fb2025-04-17 11:22:22 -07001package claudetool
2
3import (
4 "bytes"
5 "context"
6 "encoding/json"
7 "errors"
8 "fmt"
9 "go/parser"
10 "go/token"
11 "log/slog"
12 "os"
13 "path/filepath"
14 "strings"
15
Josh Bleecher Snyder3dd3e412025-07-22 20:32:03 -070016 "github.com/pkg/diff"
Earl Lee2e463fb2025-04-17 11:22:22 -070017 "sketch.dev/claudetool/editbuf"
18 "sketch.dev/claudetool/patchkit"
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070019 "sketch.dev/llm"
Earl Lee2e463fb2025-04-17 11:22:22 -070020)
21
Josh Bleecher Snyder238c18f2025-06-30 22:26:54 +000022// PatchCallback defines the signature for patch tool callbacks.
23// It runs after the patch tool has executed.
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -070024// It receives the patch input and the tool output,
25// and returns a new, possibly altered tool output.
26type PatchCallback func(input PatchInput, output llm.ToolOut) llm.ToolOut
Josh Bleecher Snyder238c18f2025-06-30 22:26:54 +000027
Josh Bleecher Snyder04f16a52025-07-30 11:46:25 -070028// PatchTool specifies an llm.Tool for patching files.
29type PatchTool struct {
30 Callback PatchCallback // may be nil
31}
32
33// Tool returns an llm.Tool based on p.
34func (p *PatchTool) Tool() *llm.Tool {
Josh Bleecher Snyder238c18f2025-06-30 22:26:54 +000035 return &llm.Tool{
36 Name: PatchName,
37 Description: strings.TrimSpace(PatchDescription),
38 InputSchema: llm.MustSchema(PatchInputSchema),
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -070039 Run: func(ctx context.Context, m json.RawMessage) llm.ToolOut {
Josh Bleecher Snyder238c18f2025-06-30 22:26:54 +000040 var input PatchInput
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -070041 output := patchRun(ctx, m, &input)
Josh Bleecher Snyder04f16a52025-07-30 11:46:25 -070042 if p.Callback != nil {
43 return p.Callback(input, output)
Josh Bleecher Snyder238c18f2025-06-30 22:26:54 +000044 }
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -070045 return output
Josh Bleecher Snyder238c18f2025-06-30 22:26:54 +000046 },
47 }
Earl Lee2e463fb2025-04-17 11:22:22 -070048}
49
50const (
51 PatchName = "patch"
52 PatchDescription = `
53File modification tool for precise text edits.
54
55Operations:
56- replace: Substitute text with new content
57- append_eof: Append new text at the end of the file
58- prepend_bof: Insert new text at the beginning of the file
59- overwrite: Replace the entire file with new content (automatically creates the file)
60
61Usage notes:
62- All inputs are interpreted literally (no automatic newline or whitespace handling)
63- For replace operations, oldText must appear EXACTLY ONCE in the file
64`
65
66 // If you modify this, update the termui template for prettier rendering.
67 PatchInputSchema = `
68{
69 "type": "object",
70 "required": ["path", "patches"],
71 "properties": {
72 "path": {
73 "type": "string",
74 "description": "Absolute path to the file to patch"
75 },
76 "patches": {
77 "type": "array",
78 "description": "List of patch requests to apply",
79 "items": {
80 "type": "object",
81 "required": ["operation", "newText"],
82 "properties": {
83 "operation": {
84 "type": "string",
85 "enum": ["replace", "append_eof", "prepend_bof", "overwrite"],
86 "description": "Type of operation to perform"
87 },
88 "oldText": {
89 "type": "string",
90 "description": "Text to locate for the operation (must be unique in file, required for replace)"
91 },
92 "newText": {
93 "type": "string",
94 "description": "The new text to use (empty for deletions)"
95 }
96 }
97 }
98 }
99 }
100}
101`
102)
103
104// TODO: maybe rename PatchRequest to PatchOperation or PatchSpec or PatchPart or just Patch?
105
Josh Bleecher Snyder238c18f2025-06-30 22:26:54 +0000106// PatchInput represents the input structure for patch operations.
107type PatchInput struct {
Earl Lee2e463fb2025-04-17 11:22:22 -0700108 Path string `json:"path"`
Josh Bleecher Snyder238c18f2025-06-30 22:26:54 +0000109 Patches []PatchRequest `json:"patches"`
Earl Lee2e463fb2025-04-17 11:22:22 -0700110}
111
Josh Bleecher Snyder238c18f2025-06-30 22:26:54 +0000112// PatchRequest represents a single patch operation.
113type PatchRequest struct {
Earl Lee2e463fb2025-04-17 11:22:22 -0700114 Operation string `json:"operation"`
115 OldText string `json:"oldText,omitempty"`
116 NewText string `json:"newText,omitempty"`
117}
118
Josh Bleecher Snyder238c18f2025-06-30 22:26:54 +0000119// patchRun implements the guts of the patch tool.
120// It populates input from m.
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700121func patchRun(ctx context.Context, m json.RawMessage, input *PatchInput) llm.ToolOut {
Earl Lee2e463fb2025-04-17 11:22:22 -0700122 if err := json.Unmarshal(m, &input); err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700123 return llm.ErrorfToolOut("failed to unmarshal user_patch input: %w", err)
Earl Lee2e463fb2025-04-17 11:22:22 -0700124 }
125
126 // Validate the input
127 if !filepath.IsAbs(input.Path) {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700128 return llm.ErrorfToolOut("path %q is not absolute", input.Path)
Earl Lee2e463fb2025-04-17 11:22:22 -0700129 }
130 if len(input.Patches) == 0 {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700131 return llm.ErrorToolOut(fmt.Errorf("no patches provided"))
Earl Lee2e463fb2025-04-17 11:22:22 -0700132 }
133 // TODO: check whether the file is autogenerated, and if so, require a "force" flag to modify it.
134
135 orig, err := os.ReadFile(input.Path)
136 // If the file doesn't exist, we can still apply patches
137 // that don't require finding existing text.
138 switch {
139 case errors.Is(err, os.ErrNotExist):
140 for _, patch := range input.Patches {
141 switch patch.Operation {
142 case "prepend_bof", "append_eof", "overwrite":
143 default:
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700144 return llm.ErrorfToolOut("file %q does not exist", input.Path)
Earl Lee2e463fb2025-04-17 11:22:22 -0700145 }
146 }
147 case err != nil:
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700148 return llm.ErrorfToolOut("failed to read file %q: %w", input.Path, err)
Earl Lee2e463fb2025-04-17 11:22:22 -0700149 }
150
151 likelyGoFile := strings.HasSuffix(input.Path, ".go")
152
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +0000153 autogenerated := likelyGoFile && IsAutogeneratedGoFile(orig)
Earl Lee2e463fb2025-04-17 11:22:22 -0700154 parsed := likelyGoFile && parseGo(orig) != nil
155
156 origStr := string(orig)
157 // Process the patches "simultaneously", minimizing them along the way.
158 // Claude generates patches that interact with each other.
159 buf := editbuf.NewBuffer(orig)
160
161 // TODO: is it better to apply the patches that apply cleanly and report on the failures?
162 // or instead have it be all-or-nothing?
163 // For now, it is all-or-nothing.
164 // TODO: when the model gets into a "cannot apply patch" cycle of doom, how do we get it unstuck?
165 // Also: how do we detect that it's in a cycle?
166 var patchErr error
167 for i, patch := range input.Patches {
168 switch patch.Operation {
169 case "prepend_bof":
170 buf.Insert(0, patch.NewText)
171 case "append_eof":
172 buf.Insert(len(orig), patch.NewText)
173 case "overwrite":
174 buf.Replace(0, len(orig), patch.NewText)
175 case "replace":
176 if patch.OldText == "" {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700177 return llm.ErrorfToolOut("patch %d: oldText cannot be empty for %s operation", i, patch.Operation)
Earl Lee2e463fb2025-04-17 11:22:22 -0700178 }
179
180 // Attempt to apply the patch.
181 spec, count := patchkit.Unique(origStr, patch.OldText, patch.NewText)
182 switch count {
183 case 0:
184 // no matches, maybe recoverable, continued below
185 case 1:
186 // exact match, apply
187 slog.DebugContext(ctx, "patch_applied", "method", "unique")
188 spec.ApplyToEditBuf(buf)
189 continue
190 case 2:
191 // multiple matches
192 patchErr = errors.Join(patchErr, fmt.Errorf("old text not unique:\n%s", patch.OldText))
193 default:
194 // TODO: return an error instead of using agentPatch
195 slog.ErrorContext(ctx, "unique returned unexpected count", "count", count)
196 patchErr = errors.Join(patchErr, fmt.Errorf("internal error"))
197 continue
198 }
199
200 // The following recovery mechanisms are heuristic.
201 // They aren't perfect, but they appear safe,
202 // and the cases they cover appear with some regularity.
203
204 // Try adjusting the whitespace prefix.
205 spec, ok := patchkit.UniqueDedent(origStr, patch.OldText, patch.NewText)
206 if ok {
207 slog.DebugContext(ctx, "patch_applied", "method", "unique_dedent")
208 spec.ApplyToEditBuf(buf)
209 continue
210 }
211
212 // Try ignoring leading/trailing whitespace in a semantically safe way.
213 spec, ok = patchkit.UniqueInValidGo(origStr, patch.OldText, patch.NewText)
214 if ok {
215 slog.DebugContext(ctx, "patch_applied", "method", "unique_in_valid_go")
216 spec.ApplyToEditBuf(buf)
217 continue
218 }
219
220 // Try ignoring semantically insignificant whitespace.
221 spec, ok = patchkit.UniqueGoTokens(origStr, patch.OldText, patch.NewText)
222 if ok {
223 slog.DebugContext(ctx, "patch_applied", "method", "unique_go_tokens")
224 spec.ApplyToEditBuf(buf)
225 continue
226 }
227
228 // Try trimming the first line of the patch, if we can do so safely.
229 spec, ok = patchkit.UniqueTrim(origStr, patch.OldText, patch.NewText)
230 if ok {
231 slog.DebugContext(ctx, "patch_applied", "method", "unique_trim")
232 spec.ApplyToEditBuf(buf)
233 continue
234 }
235
236 // No dice.
237 patchErr = errors.Join(patchErr, fmt.Errorf("old text not found:\n%s", patch.OldText))
238 continue
239 default:
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700240 return llm.ErrorfToolOut("unrecognized operation %q", patch.Operation)
Earl Lee2e463fb2025-04-17 11:22:22 -0700241 }
242 }
243
244 if patchErr != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700245 return llm.ErrorToolOut(patchErr)
Earl Lee2e463fb2025-04-17 11:22:22 -0700246 }
247
248 patched, err := buf.Bytes()
249 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700250 return llm.ErrorToolOut(err)
Earl Lee2e463fb2025-04-17 11:22:22 -0700251 }
252 if err := os.MkdirAll(filepath.Dir(input.Path), 0o700); err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700253 return llm.ErrorfToolOut("failed to create directory %q: %w", filepath.Dir(input.Path), err)
Earl Lee2e463fb2025-04-17 11:22:22 -0700254 }
255 if err := os.WriteFile(input.Path, patched, 0o600); err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700256 return llm.ErrorfToolOut("failed to write patched contents to file %q: %w", input.Path, err)
Earl Lee2e463fb2025-04-17 11:22:22 -0700257 }
258
259 response := new(strings.Builder)
260 fmt.Fprintf(response, "- Applied all patches\n")
261
262 if parsed {
263 parseErr := parseGo(patched)
264 if parseErr != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700265 return llm.ErrorfToolOut("after applying all patches, the file no longer parses:\n%w", parseErr)
Earl Lee2e463fb2025-04-17 11:22:22 -0700266 }
267 }
268
269 if autogenerated {
270 fmt.Fprintf(response, "- WARNING: %q appears to be autogenerated. Patches were applied anyway.\n", input.Path)
271 }
272
Josh Bleecher Snyder3dd3e412025-07-22 20:32:03 -0700273 diff := generateUnifiedDiff(input.Path, string(orig), string(patched))
274
Earl Lee2e463fb2025-04-17 11:22:22 -0700275 // TODO: maybe report the patch result to the model, i.e. some/all of the new code after the patches and formatting.
Josh Bleecher Snyder3dd3e412025-07-22 20:32:03 -0700276 return llm.ToolOut{
277 LLMContent: llm.TextContent(response.String()),
278 Display: diff,
279 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700280}
281
282func parseGo(buf []byte) error {
283 fset := token.NewFileSet()
284 _, err := parser.ParseFile(fset, "", buf, parser.SkipObjectResolution)
285 return err
286}
287
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +0000288// IsAutogeneratedGoFile reports whether a Go file has markers indicating it was autogenerated.
289func IsAutogeneratedGoFile(buf []byte) bool {
Earl Lee2e463fb2025-04-17 11:22:22 -0700290 for _, sig := range autogeneratedSignals {
291 if bytes.Contains(buf, []byte(sig)) {
292 return true
293 }
294 }
295
296 // https://pkg.go.dev/cmd/go#hdr-Generate_Go_files_by_processing_source
297 // "This line must appear before the first non-comment, non-blank text in the file."
298 // Approximate that by looking for it at the top of the file, before the last of the imports.
299 // (Sometimes people put it after the package declaration, because of course they do.)
300 // At least in the imports region we know it's not part of their actual code;
301 // we don't want to ignore the generator (which also includes these strings!),
302 // just the generated code.
303 fset := token.NewFileSet()
304 f, err := parser.ParseFile(fset, "x.go", buf, parser.ImportsOnly|parser.ParseComments)
305 if err == nil {
306 for _, cg := range f.Comments {
307 t := strings.ToLower(cg.Text())
308 for _, sig := range autogeneratedHeaderSignals {
309 if strings.Contains(t, sig) {
310 return true
311 }
312 }
313 }
314 }
315
316 return false
317}
318
319// autogeneratedSignals are signals that a file is autogenerated, when present anywhere in the file.
320var autogeneratedSignals = [][]byte{
321 []byte("\nfunc bindataRead("), // pre-embed bindata packed file
322}
323
324// autogeneratedHeaderSignals are signals that a file is autogenerated, when present at the top of the file.
325var autogeneratedHeaderSignals = []string{
326 // canonical would be `(?m)^// Code generated .* DO NOT EDIT\.$`
327 // but people screw it up, a lot, so be more lenient
328 strings.ToLower("generate"),
329 strings.ToLower("DO NOT EDIT"),
330 strings.ToLower("export by"),
331}
Josh Bleecher Snyder3dd3e412025-07-22 20:32:03 -0700332
333func generateUnifiedDiff(filePath, original, patched string) string {
334 buf := new(strings.Builder)
335 err := diff.Text(filePath, filePath, original, patched, buf)
336 if err != nil {
337 return fmt.Sprintf("(diff generation failed: %v)\n", err)
338 }
339 return buf.String()
340}