blob: ff20d13cc899efaa6579c54698018b8f37be2efa [file] [log] [blame]
Earl Lee2e463fb2025-04-17 11:22:22 -07001package claudetool
2
3import (
4 "context"
5 _ "embed"
6 "encoding/json"
7 "fmt"
8 "log/slog"
9 "os/exec"
10 "strings"
11
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070012 "sketch.dev/llm"
13 "sketch.dev/llm/conversation"
Earl Lee2e463fb2025-04-17 11:22:22 -070014)
15
16// The Keyword tool provides keyword search.
17// TODO: use an embedding model + re-ranker or otherwise do something nicer than this kludge.
18// TODO: if we can get this fast enough, do it on the fly while the user is typing their prompt.
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070019var Keyword = &llm.Tool{
Earl Lee2e463fb2025-04-17 11:22:22 -070020 Name: keywordName,
21 Description: keywordDescription,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070022 InputSchema: llm.MustSchema(keywordInputSchema),
Earl Lee2e463fb2025-04-17 11:22:22 -070023 Run: keywordRun,
24}
25
26const (
27 keywordName = "keyword_search"
28 keywordDescription = `
29keyword_search locates files with a search-and-filter approach.
30Use when navigating unfamiliar codebases with only conceptual understanding or vague user questions.
31
32Effective use:
33- Provide a detailed query for accurate relevance ranking
Josh Bleecher Snyder250348e2025-04-30 10:31:28 -070034- Prefer MANY SPECIFIC terms over FEW GENERAL ones (high precision beats high recall)
35- Order search terms by importance (most important first)
36- Supports regex search terms for flexible matching
Earl Lee2e463fb2025-04-17 11:22:22 -070037
Josh Bleecher Snyderdce8d842025-05-09 12:27:19 -070038IMPORTANT: Do NOT use this tool if you have precise information like log lines, error messages, stack traces, filenames, or symbols. Use direct approaches (rg, cat, etc.) instead.
Earl Lee2e463fb2025-04-17 11:22:22 -070039`
40
41 // If you modify this, update the termui template for prettier rendering.
42 keywordInputSchema = `
43{
44 "type": "object",
45 "required": [
46 "query",
Josh Bleecher Snyder250348e2025-04-30 10:31:28 -070047 "search_terms"
Earl Lee2e463fb2025-04-17 11:22:22 -070048 ],
49 "properties": {
50 "query": {
51 "type": "string",
52 "description": "A detailed statement of what you're trying to find or learn."
53 },
Josh Bleecher Snyder250348e2025-04-30 10:31:28 -070054 "search_terms": {
Earl Lee2e463fb2025-04-17 11:22:22 -070055 "type": "array",
56 "items": {
57 "type": "string"
58 },
Josh Bleecher Snyder250348e2025-04-30 10:31:28 -070059 "description": "List of search terms in descending order of importance."
Earl Lee2e463fb2025-04-17 11:22:22 -070060 }
61 }
62}
63`
64)
65
66type keywordInput struct {
Josh Bleecher Snyder250348e2025-04-30 10:31:28 -070067 Query string `json:"query"`
68 SearchTerms []string `json:"search_terms"`
Earl Lee2e463fb2025-04-17 11:22:22 -070069}
70
71//go:embed keyword_system_prompt.txt
72var keywordSystemPrompt string
73
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +000074// FindRepoRoot attempts to find the git repository root from the current directory
75func FindRepoRoot(wd string) (string, error) {
Earl Lee2e463fb2025-04-17 11:22:22 -070076 cmd := exec.Command("git", "rev-parse", "--show-toplevel")
77 cmd.Dir = wd
78 out, err := cmd.Output()
79 // todo: cwd here and throughout
80 if err != nil {
81 return "", fmt.Errorf("failed to find git repository root: %w", err)
82 }
83 return strings.TrimSpace(string(out)), nil
84}
85
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -070086func keywordRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
Earl Lee2e463fb2025-04-17 11:22:22 -070087 var input keywordInput
88 if err := json.Unmarshal(m, &input); err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -070089 return llm.ErrorToolOut(err)
Earl Lee2e463fb2025-04-17 11:22:22 -070090 }
91 wd := WorkingDir(ctx)
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +000092 root, err := FindRepoRoot(wd)
Earl Lee2e463fb2025-04-17 11:22:22 -070093 if err == nil {
94 wd = root
95 }
Josh Bleecher Snyder250348e2025-04-30 10:31:28 -070096 slog.InfoContext(ctx, "keyword search input", "query", input.Query, "keywords", input.SearchTerms, "wd", wd)
Earl Lee2e463fb2025-04-17 11:22:22 -070097
98 // first remove stopwords
99 var keep []string
Josh Bleecher Snyder250348e2025-04-30 10:31:28 -0700100 for _, term := range input.SearchTerms {
Earl Lee2e463fb2025-04-17 11:22:22 -0700101 out, err := ripgrep(ctx, wd, []string{term})
102 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700103 return llm.ErrorToolOut(err)
Earl Lee2e463fb2025-04-17 11:22:22 -0700104 }
105 if len(out) > 64*1024 {
106 slog.InfoContext(ctx, "keyword search result too large", "term", term, "bytes", len(out))
107 continue
108 }
109 keep = append(keep, term)
110 }
111
Josh Bleecher Snyder160fb062025-05-22 21:04:22 -0700112 if len(keep) == 0 {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700113 return llm.ToolOut{LLMContent: llm.TextContent("each of those search terms yielded too many results")}
Josh Bleecher Snyder160fb062025-05-22 21:04:22 -0700114 }
115
Earl Lee2e463fb2025-04-17 11:22:22 -0700116 // peel off keywords until we get a result that fits in the query window
117 var out string
118 for {
119 var err error
120 out, err = ripgrep(ctx, wd, keep)
121 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700122 return llm.ErrorToolOut(err)
Earl Lee2e463fb2025-04-17 11:22:22 -0700123 }
124 if len(out) < 128*1024 {
125 break
126 }
127 keep = keep[:len(keep)-1]
128 }
129
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700130 info := conversation.ToolCallInfoFromContext(ctx)
Earl Lee2e463fb2025-04-17 11:22:22 -0700131 convo := info.Convo.SubConvo()
132 convo.SystemPrompt = strings.TrimSpace(keywordSystemPrompt)
Josh Bleecher Snyder593ca642025-05-07 05:19:32 -0700133 convo.PromptCaching = false
Earl Lee2e463fb2025-04-17 11:22:22 -0700134
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700135 initialMessage := llm.Message{
136 Role: llm.MessageRoleUser,
137 Content: []llm.Content{
138 llm.StringContent("<pwd>\n" + wd + "\n</pwd>"),
139 llm.StringContent("<ripgrep_results>\n" + out + "\n</ripgrep_results>"),
140 llm.StringContent("<query>\n" + input.Query + "\n</query>"),
Earl Lee2e463fb2025-04-17 11:22:22 -0700141 },
142 }
143
144 resp, err := convo.SendMessage(initialMessage)
145 if err != nil {
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700146 return llm.ErrorfToolOut("failed to send relevance filtering message: %w", err)
Earl Lee2e463fb2025-04-17 11:22:22 -0700147 }
148 if len(resp.Content) != 1 {
Josh Bleecher Snyderddd22da2025-07-22 20:14:50 -0700149 return llm.ErrorfToolOut("unexpected number of messages (%d) in relevance filtering response: %v", len(resp.Content), resp.Content)
Earl Lee2e463fb2025-04-17 11:22:22 -0700150 }
151
152 filtered := resp.Content[0].Text
153
154 slog.InfoContext(ctx, "keyword search results processed",
155 "bytes", len(out),
156 "lines", strings.Count(out, "\n"),
157 "files", strings.Count(out, "\n\n"),
158 "query", input.Query,
159 "filtered", filtered,
160 )
161
Josh Bleecher Snyder43b60b92025-07-21 14:57:10 -0700162 return llm.ToolOut{LLMContent: llm.TextContent(resp.Content[0].Text)}
Earl Lee2e463fb2025-04-17 11:22:22 -0700163}
164
165func ripgrep(ctx context.Context, wd string, terms []string) (string, error) {
166 args := []string{"-C", "10", "-i", "--line-number", "--with-filename"}
167 for _, term := range terms {
168 args = append(args, "-e", term)
169 }
170 cmd := exec.CommandContext(ctx, "rg", args...)
171 cmd.Dir = wd
172 out, err := cmd.CombinedOutput()
173 if err != nil {
174 // ripgrep returns exit code 1 when no matches are found, which is not an error for us
175 if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
176 return "no matches found", nil
177 }
178 return "", fmt.Errorf("search failed: %v\n%s", err, out)
179 }
180 outStr := string(out)
181 return outStr, nil
182}