loop: add todo checklist
This should improve Sketch's executive function and user communication.
diff --git a/claudetool/shared.go b/claudetool/shared.go
index d95a5e4..310044c 100644
--- a/claudetool/shared.go
+++ b/claudetool/shared.go
@@ -23,3 +23,16 @@
wd, _ := ctx.Value(workingDirCtxKey).(string)
return wd
}
+
+type sessionIDCtxKeyType string
+
+const sessionIDCtxKey sessionIDCtxKeyType = "sessionID"
+
+func WithSessionID(ctx context.Context, sessionID string) context.Context {
+ return context.WithValue(ctx, sessionIDCtxKey, sessionID)
+}
+
+func SessionID(ctx context.Context) string {
+ sessionID, _ := ctx.Value(sessionIDCtxKey).(string)
+ return sessionID
+}
diff --git a/claudetool/todo.go b/claudetool/todo.go
new file mode 100644
index 0000000..64c7550
--- /dev/null
+++ b/claudetool/todo.go
@@ -0,0 +1,176 @@
+package claudetool
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "os"
+ "path/filepath"
+
+ "sketch.dev/llm"
+)
+
+var TodoRead = &llm.Tool{
+ Name: "todo_read",
+ Description: `Reads the current todo list. Use frequently to track progress and understand what's pending.`,
+ InputSchema: llm.EmptySchema(),
+ Run: todoReadRun,
+}
+
+var TodoWrite = &llm.Tool{
+ Name: "todo_write",
+ Description: todoWriteDescription,
+ InputSchema: llm.MustSchema(todoWriteInputSchema),
+ Run: todoWriteRun,
+}
+
+const (
+ todoWriteDescription = `todo_write: Creates and manages a structured task list for tracking work and communicating progress to users. Use early and often.
+
+Use for:
+- multi-step tasks
+- complex work
+- when users provide multiple requests
+- conversations that start trivial but grow in scope
+- when users request additional work (directly or via feedback)
+
+Skip for:
+- trivial single-step tasks
+- purely conversational exchanges
+
+Update dynamically as work evolves - conversations can spawn tasks, simple tasks can become complex, and new discoveries may require additional work.
+
+Rules:
+- Update immediately when task states or task list changes
+- Only one task "in-progress" at any time
+- Each update completely replaces the task list - include all tasks (past and present)
+- Never modify or delete completed tasks
+- Queued and in-progress tasks may be restructured as understanding evolves
+- Tasks should be atomic, clear, precise, and actionable
+- If the user adds new tasks: append, don't replace
+`
+
+ todoWriteInputSchema = `
+{
+ "type": "object",
+ "required": ["tasks"],
+ "properties": {
+ "tasks": {
+ "type": "array",
+ "description": "Array of tasks to write",
+ "items": {
+ "type": "object",
+ "required": ["id", "task", "status"],
+ "properties": {
+ "id": {
+ "type": "string",
+ "description": "stable, unique hyphenated slug"
+ },
+ "task": {
+ "type": "string",
+ "description": "actionable step in active tense, sentence case, plain text only, displayed to user"
+ },
+ "status": {
+ "type": "string",
+ "enum": ["queued", "in-progress", "completed"],
+ "description": "current task status"
+ }
+ }
+ }
+ }
+ }
+}
+`
+)
+
+type TodoItem struct {
+ ID string `json:"id"`
+ Task string `json:"task"`
+ Status string `json:"status"`
+}
+
+type TodoList struct {
+ Items []TodoItem `json:"items"`
+}
+
+type TodoWriteInput struct {
+ Tasks []TodoItem `json:"tasks"`
+}
+
+// TodoFilePath returns the path to the todo file for the given session ID.
+func TodoFilePath(sessionID string) string {
+ if sessionID == "" {
+ return "/tmp/sketch_todos.json"
+ }
+ return filepath.Join("/tmp", sessionID, "todos.json")
+}
+
+func todoFilePathForContext(ctx context.Context) string {
+ return TodoFilePath(SessionID(ctx))
+}
+
+func todoReadRun(ctx context.Context, m json.RawMessage) ([]llm.Content, error) {
+ todoPath := todoFilePathForContext(ctx)
+ content, err := os.ReadFile(todoPath)
+ if os.IsNotExist(err) {
+ return llm.TextContent("No todo list found. Use todo_write to create one."), nil
+ }
+ if err != nil {
+ return nil, fmt.Errorf("failed to read todo file: %w", err)
+ }
+
+ var todoList TodoList
+ if err := json.Unmarshal(content, &todoList); err != nil {
+ return nil, fmt.Errorf("failed to parse todo file: %w", err)
+ }
+
+ result := fmt.Sprintf(`<todo_list count="%d">%s`, len(todoList.Items), "\n")
+ for _, item := range todoList.Items {
+ result += fmt.Sprintf(` <task id="%s" status="%s">%s</task>%s`, item.ID, item.Status, item.Task, "\n")
+ }
+ result += "</todo_list>"
+
+ return llm.TextContent(result), nil
+}
+
+func todoWriteRun(ctx context.Context, m json.RawMessage) ([]llm.Content, error) {
+ var input TodoWriteInput
+ if err := json.Unmarshal(m, &input); err != nil {
+ return nil, fmt.Errorf("invalid input: %w", err)
+ }
+
+ // Validate that only one task is in-progress
+ inProgressCount := 0
+ for _, task := range input.Tasks {
+ if task.Status == "in-progress" {
+ inProgressCount++
+ }
+ }
+ switch {
+ case inProgressCount > 1:
+ return nil, fmt.Errorf("only one task can be 'in-progress' at a time, found %d", inProgressCount)
+ }
+
+ todoList := TodoList{
+ Items: input.Tasks,
+ }
+
+ todoPath := todoFilePathForContext(ctx)
+ // Ensure directory exists
+ if err := os.MkdirAll(filepath.Dir(todoPath), 0o700); err != nil {
+ return nil, fmt.Errorf("failed to create todo directory: %w", err)
+ }
+
+ content, err := json.Marshal(todoList)
+ if err != nil {
+ return nil, fmt.Errorf("failed to marshal todo list: %w", err)
+ }
+
+ if err := os.WriteFile(todoPath, content, 0o600); err != nil {
+ return nil, fmt.Errorf("failed to write todo file: %w", err)
+ }
+
+ result := fmt.Sprintf("Updated todo list with %d items.", len(input.Tasks))
+
+ return llm.TextContent(result), nil
+}
diff --git a/claudetool/todo_test.go b/claudetool/todo_test.go
new file mode 100644
index 0000000..ac36cc2
--- /dev/null
+++ b/claudetool/todo_test.go
@@ -0,0 +1,155 @@
+package claudetool
+
+import (
+ "context"
+ "encoding/json"
+ "os"
+ "path/filepath"
+ "strings"
+ "testing"
+)
+
+func TestTodoReadEmpty(t *testing.T) {
+ ctx := WithSessionID(context.Background(), "test-session-1")
+
+ // Ensure todo file doesn't exist
+ todoPath := todoFilePathForContext(ctx)
+ os.Remove(todoPath)
+
+ result, err := todoReadRun(ctx, []byte("{}"))
+ if err != nil {
+ t.Fatalf("expected no error, got %v", err)
+ }
+
+ if len(result) != 1 {
+ t.Fatalf("expected 1 content item, got %d", len(result))
+ }
+
+ expected := "No todo list found. Use todo_write to create one."
+ if result[0].Text != expected {
+ t.Errorf("expected %q, got %q", expected, result[0].Text)
+ }
+}
+
+func TestTodoWriteAndRead(t *testing.T) {
+ ctx := WithSessionID(context.Background(), "test-session-2")
+
+ // Clean up
+ todoPath := todoFilePathForContext(ctx)
+ defer os.Remove(todoPath)
+ os.Remove(todoPath)
+
+ // Write some todos
+ todos := []TodoItem{
+ {ID: "1", Task: "Implement todo tools", Status: "completed"},
+ {ID: "2", Task: "Update system prompt", Status: "in-progress"},
+ {ID: "3", Task: "Write tests", Status: "queued"},
+ }
+
+ writeInput := TodoWriteInput{Tasks: todos}
+ writeInputJSON, _ := json.Marshal(writeInput)
+
+ result, err := todoWriteRun(ctx, writeInputJSON)
+ if err != nil {
+ t.Fatalf("expected no error, got %v", err)
+ }
+
+ if len(result) != 1 {
+ t.Fatalf("expected 1 content item, got %d", len(result))
+ }
+
+ expected := "Updated todo list with 3 items."
+ if result[0].Text != expected {
+ t.Errorf("expected %q, got %q", expected, result[0].Text)
+ }
+
+ // Read the todos back
+ result, err = todoReadRun(ctx, []byte("{}"))
+ if err != nil {
+ t.Fatalf("expected no error, got %v", err)
+ }
+
+ if len(result) != 1 {
+ t.Fatalf("expected 1 content item, got %d", len(result))
+ }
+
+ resultText := result[0].Text
+ if !strings.Contains(resultText, "<todo_list count=\"3\">") {
+ t.Errorf("expected result to contain XML todo list header, got %q", resultText)
+ }
+
+ // Check that all todos are present with proper XML structure
+ if !strings.Contains(resultText, `<task id="1" status="completed">Implement todo tools</task>`) {
+ t.Errorf("expected result to contain first todo in XML format, got %q", resultText)
+ }
+ if !strings.Contains(resultText, `<task id="2" status="in-progress">Update system prompt</task>`) {
+ t.Errorf("expected result to contain second todo in XML format, got %q", resultText)
+ }
+ if !strings.Contains(resultText, `<task id="3" status="queued">Write tests</task>`) {
+ t.Errorf("expected result to contain third todo in XML format, got %q", resultText)
+ }
+
+ // Check XML structure
+ if !strings.Contains(resultText, "</todo_list>") {
+ t.Errorf("expected result to contain closing XML tag, got %q", resultText)
+ }
+}
+
+func TestTodoWriteMultipleInProgress(t *testing.T) {
+ ctx := WithSessionID(context.Background(), "test-session-3")
+
+ // Try to write todos with multiple in-progress items
+ todos := []TodoItem{
+ {ID: "1", Task: "Task 1", Status: "in-progress"},
+ {ID: "2", Task: "Task 2", Status: "in-progress"},
+ }
+
+ writeInput := TodoWriteInput{Tasks: todos}
+ writeInputJSON, _ := json.Marshal(writeInput)
+
+ _, err := todoWriteRun(ctx, writeInputJSON)
+ if err == nil {
+ t.Fatal("expected error for multiple in_progress tasks, got none")
+ }
+
+ expected := "only one task can be 'in-progress' at a time, found 2"
+ if err.Error() != expected {
+ t.Errorf("expected error %q, got %q", expected, err.Error())
+ }
+}
+
+func TestTodoSessionIsolation(t *testing.T) {
+ // Test that different sessions have different todo files
+ ctx1 := WithSessionID(context.Background(), "session-1")
+ ctx2 := WithSessionID(context.Background(), "session-2")
+
+ path1 := todoFilePathForContext(ctx1)
+ path2 := todoFilePathForContext(ctx2)
+
+ if path1 == path2 {
+ t.Errorf("expected different paths for different sessions, both got %q", path1)
+ }
+
+ expected1 := filepath.Join("/tmp", "session-1", "todos.json")
+ expected2 := filepath.Join("/tmp", "session-2", "todos.json")
+
+ if path1 != expected1 {
+ t.Errorf("expected path1 %q, got %q", expected1, path1)
+ }
+
+ if path2 != expected2 {
+ t.Errorf("expected path2 %q, got %q", expected2, path2)
+ }
+}
+
+func TestTodoFallbackPath(t *testing.T) {
+ // Test fallback when no session ID in context
+ ctx := context.Background() // No session ID
+
+ path := todoFilePathForContext(ctx)
+ expected := "/tmp/sketch_todos.json"
+
+ if path != expected {
+ t.Errorf("expected fallback path %q, got %q", expected, path)
+ }
+}