blob: 48818d6f966bd8412319699690e18a060c04752c [file] [log] [blame]
Earl Lee2e463fb2025-04-17 11:22:22 -07001package loop
2
3import (
4 "context"
5 "encoding/json"
6 "fmt"
Josh Bleecher Snyder503b5e32025-05-05 13:30:55 -07007 "strings"
Earl Lee2e463fb2025-04-17 11:22:22 -07008
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +00009 "sketch.dev/claudetool/codereview"
Josh Bleecher Snyder503b5e32025-05-05 13:30:55 -070010 "sketch.dev/experiment"
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070011 "sketch.dev/llm"
Earl Lee2e463fb2025-04-17 11:22:22 -070012)
13
14// makeDoneTool creates a tool that provides a checklist to the agent. There
15// are some duplicative instructions here and in the system prompt, and it's
16// not as reliable as it could be. Historically, we've found that Claude ignores
17// the tool results here, so we don't tell the tool to say "hey, really check this"
18// at the moment, though we've tried.
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +000019func makeDoneTool(codereview *codereview.CodeReviewer, gitUsername, gitEmail string) *llm.Tool {
Josh Bleecher Snyder503b5e32025-05-05 13:30:55 -070020 description := doneDescription
21 if experiment.Enabled("not_done") {
22 description = backtrackDoneDescription
23 }
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070024 return &llm.Tool{
Earl Lee2e463fb2025-04-17 11:22:22 -070025 Name: "done",
Josh Bleecher Snyder503b5e32025-05-05 13:30:55 -070026 Description: description,
Earl Lee2e463fb2025-04-17 11:22:22 -070027 InputSchema: json.RawMessage(doneChecklistJSONSchema(gitUsername, gitEmail)),
28 Run: func(ctx context.Context, input json.RawMessage) (string, error) {
Josh Bleecher Snyder503b5e32025-05-05 13:30:55 -070029 if experiment.Enabled("not_done") {
30 if strings.Contains(strings.ToLower(string(input)), "cancel done tool call") {
31 return "", fmt.Errorf("cancelled")
32 }
33 m := make(map[string]struct {
34 Status string
35 })
36 if err := json.Unmarshal(input, &m); err != nil {
37 return "", err
38 }
39 for _, checklist := range m {
40 if checklist.Status == "cancel" {
41 return "", fmt.Errorf("cancelled")
42 }
43 }
44 }
Earl Lee2e463fb2025-04-17 11:22:22 -070045 // Cannot be done with a messy git.
46 if err := codereview.RequireNormalGitState(ctx); err != nil {
47 return "", err
48 }
49 if err := codereview.RequireNoUncommittedChanges(ctx); err != nil {
50 return "", err
51 }
52 // Ensure that the current commit has been reviewed.
53 head, err := codereview.CurrentCommit(ctx)
54 if err == nil {
55 needsReview := !codereview.IsInitialCommit(head) && !codereview.HasReviewed(head)
56 if needsReview {
57 return "", fmt.Errorf("codereview tool has not been run for commit %v", head)
58 }
59 }
60 return `Please ask the user to review your work. Be concise - users are more likely to read shorter comments.`, nil
61 },
62 }
63}
64
65func doneChecklistJSONSchema(gitUsername, gitEmail string) string {
Josh Bleecher Snyder75ec6bb2025-04-24 10:49:16 -070066 gitCommitDescription := fmt.Sprintf(`Create git commits for any code changes you made. Match the style of recent commit messages. Include 'Co-Authored-By: sketch <hello@sketch.dev>' and the original user prompt. Use GIT_AUTHOR_NAME="%s" GIT_AUTHOR_EMAIL="%s" (not git config).`,
Earl Lee2e463fb2025-04-17 11:22:22 -070067 gitUsername, gitEmail)
68 desc, err := json.Marshal(gitCommitDescription)
69 if err != nil {
70 panic(err)
71 }
Josh Bleecher Snyder503b5e32025-05-05 13:30:55 -070072 prefix := doneChecklistJSONSchemaPrefix
73 suffix := doneChecklistJSONSchemaSuffix
74 if experiment.Enabled("not_done") {
75 prefix = backtrackDoneChecklistJSONSchemaPrefix
76 suffix = backtrackDoneChecklistJSONSchemaSuffix
77 }
78 return prefix + string(desc) + suffix
Earl Lee2e463fb2025-04-17 11:22:22 -070079}
80
81// TODO: this is ugly, maybe JSON-encode a deeply nested map[string]any instead? also ugly.
82const (
Josh Bleecher Snyderd7e56382025-05-05 13:22:08 -070083 doneDescription = `Use this tool when you have achieved the user's goal. The parameters form a checklist which you should evaluate.`
Earl Lee2e463fb2025-04-17 11:22:22 -070084 doneChecklistJSONSchemaPrefix = `{
85 "$schema": "http://json-schema.org/draft-07/schema#",
86 "title": "Checklist",
87 "description": "A schema for tracking checklist items with status and comments",
88 "type": "object",
89 "required": ["checklist_items"],
90 "properties": {
91 "checklist_items": {
92 "type": "object",
93 "description": "Collection of checklist items",
94 "properties": {
95 "wrote_tests": {
96 "$ref": "#/definitions/checklistItem",
97 "description": "If code was changed, tests were written or updated."
98 },
99 "passes_tests": {
100 "$ref": "#/definitions/checklistItem",
101 "description": "If any commits were made, tests pass."
102 },
103 "code_reviewed": {
104 "$ref": "#/definitions/checklistItem",
105 "description": "If any commits were made, the codereview tool was run and its output was addressed."
106 },
107 "git_commit": {
108 "$ref": "#/definitions/checklistItem",
109 "description": `
110
111 doneChecklistJSONSchemaSuffix = `
Josh Bleecher Snyderc6a2c242025-04-22 18:04:16 -0700112 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700113 },
114 "additionalProperties": {
115 "$ref": "#/definitions/checklistItem"
116 }
117 }
118 },
119 "definitions": {
120 "checklistItem": {
121 "type": "object",
122 "required": ["status"],
123 "properties": {
124 "status": {
125 "type": "string",
126 "description": "Current status of the checklist item",
127 "enum": ["yes", "no", "not applicable", "other"]
128 },
129 "description": {
130 "type": "string",
131 "description": "Description of what this checklist item verifies"
132 },
133 "comments": {
134 "type": "string",
135 "description": "Additional comments or context for this checklist item"
136 }
137 }
138 }
139 }
140}`
141)
Josh Bleecher Snyder503b5e32025-05-05 13:30:55 -0700142
143const (
144 backtrackDoneDescription = `This tool marks task completion. Review the checklist items carefully - if any item's status is "cancel" or any thoughts contain "Cancel done tool call", the entire call will be ignored without user notification. Cancellation is free and preferred over inaccurately marking items as "done" or "not applicable".`
145 backtrackDoneChecklistJSONSchemaPrefix = `{
146 "$schema": "http://json-schema.org/draft-07/schema#",
147 "title": "Checklist",
148 "description": "A schema for tracking checklist items",
149 "type": "object",
150 "required": ["checklist_items"],
151 "properties": {
152 "checklist_items": {
153 "type": "object",
154 "description": "Collection of checklist items",
155 "properties": {
156 "wrote_tests": {
157 "$ref": "#/definitions/checklistItem",
158 "description": "If code was changed, tests were written or updated."
159 },
160 "passes_tests": {
161 "$ref": "#/definitions/checklistItem",
162 "description": "If any commits were made, tests pass."
163 },
164 "code_reviewed": {
165 "$ref": "#/definitions/checklistItem",
166 "description": "If any commits were made, the codereview tool was run and its output was addressed."
167 },
168 "git_commit": {
169 "$ref": "#/definitions/checklistItem",
170 "description": `
171
172 backtrackDoneChecklistJSONSchemaSuffix = `
173 }
174 },
175 "additionalProperties": {
176 "$ref": "#/definitions/checklistItem"
177 }
178 }
179 },
180 "definitions": {
181 "checklistItem": {
182 "type": "object",
183 "required": ["thoughts", "status"],
184 "properties": {
185 "thoughts": {
186 "type": "string",
187 "description": "Reflections on this item's status - be honest about any issues"
188 },
189 "status": {
190 "type": "string",
191 "description": "Current status - when in doubt, cancel",
192 "enum": ["done", "cancel", "n/a"]
193 }
194 }
195 }
196 }
197}`
198)