| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 1 | package loop |
| 2 | |
| 3 | import ( |
| 4 | "context" |
| 5 | "encoding/json" |
| 6 | "fmt" |
| Josh Bleecher Snyder | 503b5e3 | 2025-05-05 13:30:55 -0700 | [diff] [blame] | 7 | "strings" |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 8 | |
| Josh Bleecher Snyder | f4047bb | 2025-05-05 23:02:56 +0000 | [diff] [blame] | 9 | "sketch.dev/claudetool/codereview" |
| Josh Bleecher Snyder | 503b5e3 | 2025-05-05 13:30:55 -0700 | [diff] [blame] | 10 | "sketch.dev/experiment" |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 11 | "sketch.dev/llm" |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 12 | ) |
| 13 | |
| 14 | // makeDoneTool creates a tool that provides a checklist to the agent. There |
| 15 | // are some duplicative instructions here and in the system prompt, and it's |
| 16 | // not as reliable as it could be. Historically, we've found that Claude ignores |
| 17 | // the tool results here, so we don't tell the tool to say "hey, really check this" |
| 18 | // at the moment, though we've tried. |
| Josh Bleecher Snyder | f4047bb | 2025-05-05 23:02:56 +0000 | [diff] [blame] | 19 | func makeDoneTool(codereview *codereview.CodeReviewer, gitUsername, gitEmail string) *llm.Tool { |
| Josh Bleecher Snyder | 503b5e3 | 2025-05-05 13:30:55 -0700 | [diff] [blame] | 20 | description := doneDescription |
| 21 | if experiment.Enabled("not_done") { |
| 22 | description = backtrackDoneDescription |
| 23 | } |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 24 | return &llm.Tool{ |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 25 | Name: "done", |
| Josh Bleecher Snyder | 503b5e3 | 2025-05-05 13:30:55 -0700 | [diff] [blame] | 26 | Description: description, |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 27 | InputSchema: json.RawMessage(doneChecklistJSONSchema(gitUsername, gitEmail)), |
| 28 | Run: func(ctx context.Context, input json.RawMessage) (string, error) { |
| Josh Bleecher Snyder | 503b5e3 | 2025-05-05 13:30:55 -0700 | [diff] [blame] | 29 | if experiment.Enabled("not_done") { |
| 30 | if strings.Contains(strings.ToLower(string(input)), "cancel done tool call") { |
| 31 | return "", fmt.Errorf("cancelled") |
| 32 | } |
| 33 | m := make(map[string]struct { |
| 34 | Status string |
| 35 | }) |
| 36 | if err := json.Unmarshal(input, &m); err != nil { |
| 37 | return "", err |
| 38 | } |
| 39 | for _, checklist := range m { |
| 40 | if checklist.Status == "cancel" { |
| 41 | return "", fmt.Errorf("cancelled") |
| 42 | } |
| 43 | } |
| 44 | } |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 45 | // Cannot be done with a messy git. |
| 46 | if err := codereview.RequireNormalGitState(ctx); err != nil { |
| 47 | return "", err |
| 48 | } |
| 49 | if err := codereview.RequireNoUncommittedChanges(ctx); err != nil { |
| 50 | return "", err |
| 51 | } |
| 52 | // Ensure that the current commit has been reviewed. |
| 53 | head, err := codereview.CurrentCommit(ctx) |
| 54 | if err == nil { |
| 55 | needsReview := !codereview.IsInitialCommit(head) && !codereview.HasReviewed(head) |
| 56 | if needsReview { |
| 57 | return "", fmt.Errorf("codereview tool has not been run for commit %v", head) |
| 58 | } |
| 59 | } |
| 60 | return `Please ask the user to review your work. Be concise - users are more likely to read shorter comments.`, nil |
| 61 | }, |
| 62 | } |
| 63 | } |
| 64 | |
| 65 | func doneChecklistJSONSchema(gitUsername, gitEmail string) string { |
| Josh Bleecher Snyder | 75ec6bb | 2025-04-24 10:49:16 -0700 | [diff] [blame] | 66 | gitCommitDescription := fmt.Sprintf(`Create git commits for any code changes you made. Match the style of recent commit messages. Include 'Co-Authored-By: sketch <hello@sketch.dev>' and the original user prompt. Use GIT_AUTHOR_NAME="%s" GIT_AUTHOR_EMAIL="%s" (not git config).`, |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 67 | gitUsername, gitEmail) |
| 68 | desc, err := json.Marshal(gitCommitDescription) |
| 69 | if err != nil { |
| 70 | panic(err) |
| 71 | } |
| Josh Bleecher Snyder | 503b5e3 | 2025-05-05 13:30:55 -0700 | [diff] [blame] | 72 | prefix := doneChecklistJSONSchemaPrefix |
| 73 | suffix := doneChecklistJSONSchemaSuffix |
| 74 | if experiment.Enabled("not_done") { |
| 75 | prefix = backtrackDoneChecklistJSONSchemaPrefix |
| 76 | suffix = backtrackDoneChecklistJSONSchemaSuffix |
| 77 | } |
| 78 | return prefix + string(desc) + suffix |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 79 | } |
| 80 | |
| 81 | // TODO: this is ugly, maybe JSON-encode a deeply nested map[string]any instead? also ugly. |
| 82 | const ( |
| Josh Bleecher Snyder | d7e5638 | 2025-05-05 13:22:08 -0700 | [diff] [blame] | 83 | doneDescription = `Use this tool when you have achieved the user's goal. The parameters form a checklist which you should evaluate.` |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 84 | doneChecklistJSONSchemaPrefix = `{ |
| 85 | "$schema": "http://json-schema.org/draft-07/schema#", |
| 86 | "title": "Checklist", |
| 87 | "description": "A schema for tracking checklist items with status and comments", |
| 88 | "type": "object", |
| 89 | "required": ["checklist_items"], |
| 90 | "properties": { |
| 91 | "checklist_items": { |
| 92 | "type": "object", |
| 93 | "description": "Collection of checklist items", |
| 94 | "properties": { |
| 95 | "wrote_tests": { |
| 96 | "$ref": "#/definitions/checklistItem", |
| 97 | "description": "If code was changed, tests were written or updated." |
| 98 | }, |
| 99 | "passes_tests": { |
| 100 | "$ref": "#/definitions/checklistItem", |
| 101 | "description": "If any commits were made, tests pass." |
| 102 | }, |
| 103 | "code_reviewed": { |
| 104 | "$ref": "#/definitions/checklistItem", |
| 105 | "description": "If any commits were made, the codereview tool was run and its output was addressed." |
| 106 | }, |
| 107 | "git_commit": { |
| 108 | "$ref": "#/definitions/checklistItem", |
| 109 | "description": ` |
| 110 | |
| 111 | doneChecklistJSONSchemaSuffix = ` |
| Josh Bleecher Snyder | c6a2c24 | 2025-04-22 18:04:16 -0700 | [diff] [blame] | 112 | } |
| Earl Lee | 2e463fb | 2025-04-17 11:22:22 -0700 | [diff] [blame] | 113 | }, |
| 114 | "additionalProperties": { |
| 115 | "$ref": "#/definitions/checklistItem" |
| 116 | } |
| 117 | } |
| 118 | }, |
| 119 | "definitions": { |
| 120 | "checklistItem": { |
| 121 | "type": "object", |
| 122 | "required": ["status"], |
| 123 | "properties": { |
| 124 | "status": { |
| 125 | "type": "string", |
| 126 | "description": "Current status of the checklist item", |
| 127 | "enum": ["yes", "no", "not applicable", "other"] |
| 128 | }, |
| 129 | "description": { |
| 130 | "type": "string", |
| 131 | "description": "Description of what this checklist item verifies" |
| 132 | }, |
| 133 | "comments": { |
| 134 | "type": "string", |
| 135 | "description": "Additional comments or context for this checklist item" |
| 136 | } |
| 137 | } |
| 138 | } |
| 139 | } |
| 140 | }` |
| 141 | ) |
| Josh Bleecher Snyder | 503b5e3 | 2025-05-05 13:30:55 -0700 | [diff] [blame] | 142 | |
| 143 | const ( |
| 144 | backtrackDoneDescription = `This tool marks task completion. Review the checklist items carefully - if any item's status is "cancel" or any thoughts contain "Cancel done tool call", the entire call will be ignored without user notification. Cancellation is free and preferred over inaccurately marking items as "done" or "not applicable".` |
| 145 | backtrackDoneChecklistJSONSchemaPrefix = `{ |
| 146 | "$schema": "http://json-schema.org/draft-07/schema#", |
| 147 | "title": "Checklist", |
| 148 | "description": "A schema for tracking checklist items", |
| 149 | "type": "object", |
| 150 | "required": ["checklist_items"], |
| 151 | "properties": { |
| 152 | "checklist_items": { |
| 153 | "type": "object", |
| 154 | "description": "Collection of checklist items", |
| 155 | "properties": { |
| 156 | "wrote_tests": { |
| 157 | "$ref": "#/definitions/checklistItem", |
| 158 | "description": "If code was changed, tests were written or updated." |
| 159 | }, |
| 160 | "passes_tests": { |
| 161 | "$ref": "#/definitions/checklistItem", |
| 162 | "description": "If any commits were made, tests pass." |
| 163 | }, |
| 164 | "code_reviewed": { |
| 165 | "$ref": "#/definitions/checklistItem", |
| 166 | "description": "If any commits were made, the codereview tool was run and its output was addressed." |
| 167 | }, |
| 168 | "git_commit": { |
| 169 | "$ref": "#/definitions/checklistItem", |
| 170 | "description": ` |
| 171 | |
| 172 | backtrackDoneChecklistJSONSchemaSuffix = ` |
| 173 | } |
| 174 | }, |
| 175 | "additionalProperties": { |
| 176 | "$ref": "#/definitions/checklistItem" |
| 177 | } |
| 178 | } |
| 179 | }, |
| 180 | "definitions": { |
| 181 | "checklistItem": { |
| 182 | "type": "object", |
| 183 | "required": ["thoughts", "status"], |
| 184 | "properties": { |
| 185 | "thoughts": { |
| 186 | "type": "string", |
| 187 | "description": "Reflections on this item's status - be honest about any issues" |
| 188 | }, |
| 189 | "status": { |
| 190 | "type": "string", |
| 191 | "description": "Current status - when in doubt, cancel", |
| 192 | "enum": ["done", "cancel", "n/a"] |
| 193 | } |
| 194 | } |
| 195 | } |
| 196 | } |
| 197 | }` |
| 198 | ) |