blob: 0870fb179f9b0cebef95a7fafcb2f6bfcfb082ab [file] [log] [blame]
Earl Lee2e463fb2025-04-17 11:22:22 -07001package loop
2
3import (
4 "context"
5 "encoding/json"
6 "fmt"
Josh Bleecher Snyder503b5e32025-05-05 13:30:55 -07007 "strings"
Earl Lee2e463fb2025-04-17 11:22:22 -07008
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +00009 "sketch.dev/claudetool/codereview"
Josh Bleecher Snyder503b5e32025-05-05 13:30:55 -070010 "sketch.dev/experiment"
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070011 "sketch.dev/llm"
Earl Lee2e463fb2025-04-17 11:22:22 -070012)
13
14// makeDoneTool creates a tool that provides a checklist to the agent. There
15// are some duplicative instructions here and in the system prompt, and it's
16// not as reliable as it could be. Historically, we've found that Claude ignores
17// the tool results here, so we don't tell the tool to say "hey, really check this"
18// at the moment, though we've tried.
Josh Bleecher Snyder93202652025-05-08 02:05:57 +000019func makeDoneTool(codereview *codereview.CodeReviewer) *llm.Tool {
Josh Bleecher Snyder503b5e32025-05-05 13:30:55 -070020 description := doneDescription
21 if experiment.Enabled("not_done") {
22 description = backtrackDoneDescription
23 }
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070024 return &llm.Tool{
Earl Lee2e463fb2025-04-17 11:22:22 -070025 Name: "done",
Josh Bleecher Snyder503b5e32025-05-05 13:30:55 -070026 Description: description,
Josh Bleecher Snyder93202652025-05-08 02:05:57 +000027 InputSchema: json.RawMessage(doneChecklistSchema()),
Earl Lee2e463fb2025-04-17 11:22:22 -070028 Run: func(ctx context.Context, input json.RawMessage) (string, error) {
Josh Bleecher Snyder503b5e32025-05-05 13:30:55 -070029 if experiment.Enabled("not_done") {
30 if strings.Contains(strings.ToLower(string(input)), "cancel done tool call") {
31 return "", fmt.Errorf("cancelled")
32 }
33 m := make(map[string]struct {
34 Status string
35 })
36 if err := json.Unmarshal(input, &m); err != nil {
37 return "", err
38 }
39 for _, checklist := range m {
40 if checklist.Status == "cancel" {
41 return "", fmt.Errorf("cancelled")
42 }
43 }
44 }
Earl Lee2e463fb2025-04-17 11:22:22 -070045 // Cannot be done with a messy git.
46 if err := codereview.RequireNormalGitState(ctx); err != nil {
47 return "", err
48 }
49 if err := codereview.RequireNoUncommittedChanges(ctx); err != nil {
50 return "", err
51 }
52 // Ensure that the current commit has been reviewed.
53 head, err := codereview.CurrentCommit(ctx)
54 if err == nil {
55 needsReview := !codereview.IsInitialCommit(head) && !codereview.HasReviewed(head)
56 if needsReview {
57 return "", fmt.Errorf("codereview tool has not been run for commit %v", head)
58 }
59 }
60 return `Please ask the user to review your work. Be concise - users are more likely to read shorter comments.`, nil
61 },
62 }
63}
64
Josh Bleecher Snyder93202652025-05-08 02:05:57 +000065func doneChecklistSchema() string {
Josh Bleecher Snyder503b5e32025-05-05 13:30:55 -070066 if experiment.Enabled("not_done") {
Josh Bleecher Snyder93202652025-05-08 02:05:57 +000067 return backtrackDoneChecklistJSONSchema
Josh Bleecher Snyder503b5e32025-05-05 13:30:55 -070068 }
Josh Bleecher Snyder93202652025-05-08 02:05:57 +000069 return doneChecklistJSONSchema
Earl Lee2e463fb2025-04-17 11:22:22 -070070}
71
72// TODO: this is ugly, maybe JSON-encode a deeply nested map[string]any instead? also ugly.
73const (
Josh Bleecher Snyder93202652025-05-08 02:05:57 +000074 doneDescription = `Use this tool when you have achieved the user's goal. The parameters form a checklist which you should evaluate.`
75 doneChecklistJSONSchema = `{
Earl Lee2e463fb2025-04-17 11:22:22 -070076 "$schema": "http://json-schema.org/draft-07/schema#",
77 "title": "Checklist",
78 "description": "A schema for tracking checklist items with status and comments",
79 "type": "object",
80 "required": ["checklist_items"],
81 "properties": {
82 "checklist_items": {
83 "type": "object",
84 "description": "Collection of checklist items",
85 "properties": {
86 "wrote_tests": {
87 "$ref": "#/definitions/checklistItem",
88 "description": "If code was changed, tests were written or updated."
89 },
90 "passes_tests": {
91 "$ref": "#/definitions/checklistItem",
92 "description": "If any commits were made, tests pass."
93 },
94 "code_reviewed": {
95 "$ref": "#/definitions/checklistItem",
96 "description": "If any commits were made, the codereview tool was run and its output was addressed."
97 },
98 "git_commit": {
99 "$ref": "#/definitions/checklistItem",
Josh Bleecher Snyder93202652025-05-08 02:05:57 +0000100 "description": "Create git commits for any code changes you made, adding --trailer 'Co-Authored-By: sketch <hello@sketch.dev>' and --trailer 'Change-ID: s$(openssl rand -hex 8)k'. The git user is already configured correctly."
Josh Bleecher Snyderc6a2c242025-04-22 18:04:16 -0700101 }
Earl Lee2e463fb2025-04-17 11:22:22 -0700102 },
103 "additionalProperties": {
104 "$ref": "#/definitions/checklistItem"
105 }
106 }
107 },
108 "definitions": {
109 "checklistItem": {
110 "type": "object",
111 "required": ["status"],
112 "properties": {
113 "status": {
114 "type": "string",
115 "description": "Current status of the checklist item",
116 "enum": ["yes", "no", "not applicable", "other"]
117 },
118 "description": {
119 "type": "string",
120 "description": "Description of what this checklist item verifies"
121 },
122 "comments": {
123 "type": "string",
124 "description": "Additional comments or context for this checklist item"
125 }
126 }
127 }
128 }
129}`
130)
Josh Bleecher Snyder503b5e32025-05-05 13:30:55 -0700131
132const (
Josh Bleecher Snyder93202652025-05-08 02:05:57 +0000133 backtrackDoneDescription = `This tool marks task completion. Review the checklist items carefully - if any item's status is "cancel" or any thoughts contain "Cancel done tool call", the entire call will be ignored without user notification. Cancellation is free and preferred over inaccurately marking items as "done" or "not applicable".`
134 backtrackDoneChecklistJSONSchema = `{
Josh Bleecher Snyder503b5e32025-05-05 13:30:55 -0700135 "$schema": "http://json-schema.org/draft-07/schema#",
136 "title": "Checklist",
137 "description": "A schema for tracking checklist items",
138 "type": "object",
139 "required": ["checklist_items"],
140 "properties": {
141 "checklist_items": {
142 "type": "object",
143 "description": "Collection of checklist items",
144 "properties": {
145 "wrote_tests": {
146 "$ref": "#/definitions/checklistItem",
147 "description": "If code was changed, tests were written or updated."
148 },
149 "passes_tests": {
150 "$ref": "#/definitions/checklistItem",
151 "description": "If any commits were made, tests pass."
152 },
153 "code_reviewed": {
154 "$ref": "#/definitions/checklistItem",
155 "description": "If any commits were made, the codereview tool was run and its output was addressed."
156 },
157 "git_commit": {
158 "$ref": "#/definitions/checklistItem",
Josh Bleecher Snyder93202652025-05-08 02:05:57 +0000159 "description": "Create git commits for any code changes you made, adding --trailer 'Co-Authored-By: sketch <hello@sketch.dev>' and --trailer 'Change-ID: s$(openssl rand -hex 8)k'. The git user is already configured correctly."
Josh Bleecher Snyder503b5e32025-05-05 13:30:55 -0700160 }
161 },
162 "additionalProperties": {
163 "$ref": "#/definitions/checklistItem"
164 }
165 }
166 },
167 "definitions": {
168 "checklistItem": {
169 "type": "object",
170 "required": ["thoughts", "status"],
171 "properties": {
172 "thoughts": {
173 "type": "string",
174 "description": "Reflections on this item's status - be honest about any issues"
175 },
176 "status": {
177 "type": "string",
178 "description": "Current status - when in doubt, cancel",
179 "enum": ["done", "cancel", "n/a"]
180 }
181 }
182 }
183 }
184}`
185)