blob: a99e3cd1a54d902045fdc92e39c0fc90f9b8163f [file] [log] [blame]
Earl Lee2e463fb2025-04-17 11:22:22 -07001package claudetool
2
3import (
4 "context"
5 _ "embed"
6 "encoding/json"
7 "fmt"
8 "log/slog"
9 "os/exec"
10 "strings"
11
12 "sketch.dev/ant"
13)
14
15// The Keyword tool provides keyword search.
16// TODO: use an embedding model + re-ranker or otherwise do something nicer than this kludge.
17// TODO: if we can get this fast enough, do it on the fly while the user is typing their prompt.
18var Keyword = &ant.Tool{
19 Name: keywordName,
20 Description: keywordDescription,
21 InputSchema: ant.MustSchema(keywordInputSchema),
22 Run: keywordRun,
23}
24
25const (
26 keywordName = "keyword_search"
27 keywordDescription = `
28keyword_search locates files with a search-and-filter approach.
29Use when navigating unfamiliar codebases with only conceptual understanding or vague user questions.
30
31Effective use:
32- Provide a detailed query for accurate relevance ranking
Josh Bleecher Snyder250348e2025-04-30 10:31:28 -070033- Prefer MANY SPECIFIC terms over FEW GENERAL ones (high precision beats high recall)
34- Order search terms by importance (most important first)
35- Supports regex search terms for flexible matching
Earl Lee2e463fb2025-04-17 11:22:22 -070036
37IMPORTANT: Do NOT use this tool if you have precise information like log lines, error messages, filenames, symbols, or package names. Use direct approaches (grep, cat, go doc, etc.) instead.
38`
39
40 // If you modify this, update the termui template for prettier rendering.
41 keywordInputSchema = `
42{
43 "type": "object",
44 "required": [
45 "query",
Josh Bleecher Snyder250348e2025-04-30 10:31:28 -070046 "search_terms"
Earl Lee2e463fb2025-04-17 11:22:22 -070047 ],
48 "properties": {
49 "query": {
50 "type": "string",
51 "description": "A detailed statement of what you're trying to find or learn."
52 },
Josh Bleecher Snyder250348e2025-04-30 10:31:28 -070053 "search_terms": {
Earl Lee2e463fb2025-04-17 11:22:22 -070054 "type": "array",
55 "items": {
56 "type": "string"
57 },
Josh Bleecher Snyder250348e2025-04-30 10:31:28 -070058 "description": "List of search terms in descending order of importance."
Earl Lee2e463fb2025-04-17 11:22:22 -070059 }
60 }
61}
62`
63)
64
65type keywordInput struct {
Josh Bleecher Snyder250348e2025-04-30 10:31:28 -070066 Query string `json:"query"`
67 SearchTerms []string `json:"search_terms"`
Earl Lee2e463fb2025-04-17 11:22:22 -070068}
69
70//go:embed keyword_system_prompt.txt
71var keywordSystemPrompt string
72
73// findRepoRoot attempts to find the git repository root from the current directory
74func findRepoRoot(wd string) (string, error) {
75 cmd := exec.Command("git", "rev-parse", "--show-toplevel")
76 cmd.Dir = wd
77 out, err := cmd.Output()
78 // todo: cwd here and throughout
79 if err != nil {
80 return "", fmt.Errorf("failed to find git repository root: %w", err)
81 }
82 return strings.TrimSpace(string(out)), nil
83}
84
85func keywordRun(ctx context.Context, m json.RawMessage) (string, error) {
86 var input keywordInput
87 if err := json.Unmarshal(m, &input); err != nil {
88 return "", err
89 }
90 wd := WorkingDir(ctx)
91 root, err := findRepoRoot(wd)
92 if err == nil {
93 wd = root
94 }
Josh Bleecher Snyder250348e2025-04-30 10:31:28 -070095 slog.InfoContext(ctx, "keyword search input", "query", input.Query, "keywords", input.SearchTerms, "wd", wd)
Earl Lee2e463fb2025-04-17 11:22:22 -070096
97 // first remove stopwords
98 var keep []string
Josh Bleecher Snyder250348e2025-04-30 10:31:28 -070099 for _, term := range input.SearchTerms {
Earl Lee2e463fb2025-04-17 11:22:22 -0700100 out, err := ripgrep(ctx, wd, []string{term})
101 if err != nil {
102 return "", err
103 }
104 if len(out) > 64*1024 {
105 slog.InfoContext(ctx, "keyword search result too large", "term", term, "bytes", len(out))
106 continue
107 }
108 keep = append(keep, term)
109 }
110
111 // peel off keywords until we get a result that fits in the query window
112 var out string
113 for {
114 var err error
115 out, err = ripgrep(ctx, wd, keep)
116 if err != nil {
117 return "", err
118 }
119 if len(out) < 128*1024 {
120 break
121 }
122 keep = keep[:len(keep)-1]
123 }
124
125 info := ant.ToolCallInfoFromContext(ctx)
126 convo := info.Convo.SubConvo()
127 convo.SystemPrompt = strings.TrimSpace(keywordSystemPrompt)
128
129 initialMessage := ant.Message{
130 Role: ant.MessageRoleUser,
131 Content: []ant.Content{
132 ant.StringContent("<pwd>\n" + wd + "\n</pwd>"),
133 ant.StringContent("<ripgrep_results>\n" + out + "\n</ripgrep_results>"),
134 ant.StringContent("<query>\n" + input.Query + "\n</query>"),
135 },
136 }
137
138 resp, err := convo.SendMessage(initialMessage)
139 if err != nil {
140 return "", fmt.Errorf("failed to send relevance filtering message: %w", err)
141 }
142 if len(resp.Content) != 1 {
143 return "", fmt.Errorf("unexpected number of messages in relevance filtering response: %d", len(resp.Content))
144 }
145
146 filtered := resp.Content[0].Text
147
148 slog.InfoContext(ctx, "keyword search results processed",
149 "bytes", len(out),
150 "lines", strings.Count(out, "\n"),
151 "files", strings.Count(out, "\n\n"),
152 "query", input.Query,
153 "filtered", filtered,
154 )
155
156 return resp.Content[0].Text, nil
157}
158
159func ripgrep(ctx context.Context, wd string, terms []string) (string, error) {
160 args := []string{"-C", "10", "-i", "--line-number", "--with-filename"}
161 for _, term := range terms {
162 args = append(args, "-e", term)
163 }
164 cmd := exec.CommandContext(ctx, "rg", args...)
165 cmd.Dir = wd
166 out, err := cmd.CombinedOutput()
167 if err != nil {
168 // ripgrep returns exit code 1 when no matches are found, which is not an error for us
169 if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
170 return "no matches found", nil
171 }
172 return "", fmt.Errorf("search failed: %v\n%s", err, out)
173 }
174 outStr := string(out)
175 return outStr, nil
176}