blob: f16338f754b784d14ef13c7cac79985802bb3f7b [file] [log] [blame]
Earl Lee2e463fb2025-04-17 11:22:22 -07001package claudetool
2
3import (
4 "context"
5 _ "embed"
6 "encoding/json"
7 "fmt"
8 "log/slog"
9 "os/exec"
10 "strings"
11
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070012 "sketch.dev/llm"
13 "sketch.dev/llm/conversation"
Earl Lee2e463fb2025-04-17 11:22:22 -070014)
15
16// The Keyword tool provides keyword search.
17// TODO: use an embedding model + re-ranker or otherwise do something nicer than this kludge.
18// TODO: if we can get this fast enough, do it on the fly while the user is typing their prompt.
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070019var Keyword = &llm.Tool{
Earl Lee2e463fb2025-04-17 11:22:22 -070020 Name: keywordName,
21 Description: keywordDescription,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070022 InputSchema: llm.MustSchema(keywordInputSchema),
Earl Lee2e463fb2025-04-17 11:22:22 -070023 Run: keywordRun,
24}
25
26const (
27 keywordName = "keyword_search"
28 keywordDescription = `
29keyword_search locates files with a search-and-filter approach.
30Use when navigating unfamiliar codebases with only conceptual understanding or vague user questions.
31
32Effective use:
33- Provide a detailed query for accurate relevance ranking
Josh Bleecher Snyder250348e2025-04-30 10:31:28 -070034- Prefer MANY SPECIFIC terms over FEW GENERAL ones (high precision beats high recall)
35- Order search terms by importance (most important first)
36- Supports regex search terms for flexible matching
Earl Lee2e463fb2025-04-17 11:22:22 -070037
38IMPORTANT: Do NOT use this tool if you have precise information like log lines, error messages, filenames, symbols, or package names. Use direct approaches (grep, cat, go doc, etc.) instead.
39`
40
41 // If you modify this, update the termui template for prettier rendering.
42 keywordInputSchema = `
43{
44 "type": "object",
45 "required": [
46 "query",
Josh Bleecher Snyder250348e2025-04-30 10:31:28 -070047 "search_terms"
Earl Lee2e463fb2025-04-17 11:22:22 -070048 ],
49 "properties": {
50 "query": {
51 "type": "string",
52 "description": "A detailed statement of what you're trying to find or learn."
53 },
Josh Bleecher Snyder250348e2025-04-30 10:31:28 -070054 "search_terms": {
Earl Lee2e463fb2025-04-17 11:22:22 -070055 "type": "array",
56 "items": {
57 "type": "string"
58 },
Josh Bleecher Snyder250348e2025-04-30 10:31:28 -070059 "description": "List of search terms in descending order of importance."
Earl Lee2e463fb2025-04-17 11:22:22 -070060 }
61 }
62}
63`
64)
65
66type keywordInput struct {
Josh Bleecher Snyder250348e2025-04-30 10:31:28 -070067 Query string `json:"query"`
68 SearchTerms []string `json:"search_terms"`
Earl Lee2e463fb2025-04-17 11:22:22 -070069}
70
71//go:embed keyword_system_prompt.txt
72var keywordSystemPrompt string
73
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +000074// FindRepoRoot attempts to find the git repository root from the current directory
75func FindRepoRoot(wd string) (string, error) {
Earl Lee2e463fb2025-04-17 11:22:22 -070076 cmd := exec.Command("git", "rev-parse", "--show-toplevel")
77 cmd.Dir = wd
78 out, err := cmd.Output()
79 // todo: cwd here and throughout
80 if err != nil {
81 return "", fmt.Errorf("failed to find git repository root: %w", err)
82 }
83 return strings.TrimSpace(string(out)), nil
84}
85
86func keywordRun(ctx context.Context, m json.RawMessage) (string, error) {
87 var input keywordInput
88 if err := json.Unmarshal(m, &input); err != nil {
89 return "", err
90 }
91 wd := WorkingDir(ctx)
Josh Bleecher Snyderf4047bb2025-05-05 23:02:56 +000092 root, err := FindRepoRoot(wd)
Earl Lee2e463fb2025-04-17 11:22:22 -070093 if err == nil {
94 wd = root
95 }
Josh Bleecher Snyder250348e2025-04-30 10:31:28 -070096 slog.InfoContext(ctx, "keyword search input", "query", input.Query, "keywords", input.SearchTerms, "wd", wd)
Earl Lee2e463fb2025-04-17 11:22:22 -070097
98 // first remove stopwords
99 var keep []string
Josh Bleecher Snyder250348e2025-04-30 10:31:28 -0700100 for _, term := range input.SearchTerms {
Earl Lee2e463fb2025-04-17 11:22:22 -0700101 out, err := ripgrep(ctx, wd, []string{term})
102 if err != nil {
103 return "", err
104 }
105 if len(out) > 64*1024 {
106 slog.InfoContext(ctx, "keyword search result too large", "term", term, "bytes", len(out))
107 continue
108 }
109 keep = append(keep, term)
110 }
111
112 // peel off keywords until we get a result that fits in the query window
113 var out string
114 for {
115 var err error
116 out, err = ripgrep(ctx, wd, keep)
117 if err != nil {
118 return "", err
119 }
120 if len(out) < 128*1024 {
121 break
122 }
123 keep = keep[:len(keep)-1]
124 }
125
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700126 info := conversation.ToolCallInfoFromContext(ctx)
Earl Lee2e463fb2025-04-17 11:22:22 -0700127 convo := info.Convo.SubConvo()
128 convo.SystemPrompt = strings.TrimSpace(keywordSystemPrompt)
Josh Bleecher Snyder593ca642025-05-07 05:19:32 -0700129 convo.PromptCaching = false
Earl Lee2e463fb2025-04-17 11:22:22 -0700130
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700131 initialMessage := llm.Message{
132 Role: llm.MessageRoleUser,
133 Content: []llm.Content{
134 llm.StringContent("<pwd>\n" + wd + "\n</pwd>"),
135 llm.StringContent("<ripgrep_results>\n" + out + "\n</ripgrep_results>"),
136 llm.StringContent("<query>\n" + input.Query + "\n</query>"),
Earl Lee2e463fb2025-04-17 11:22:22 -0700137 },
138 }
139
140 resp, err := convo.SendMessage(initialMessage)
141 if err != nil {
142 return "", fmt.Errorf("failed to send relevance filtering message: %w", err)
143 }
144 if len(resp.Content) != 1 {
145 return "", fmt.Errorf("unexpected number of messages in relevance filtering response: %d", len(resp.Content))
146 }
147
148 filtered := resp.Content[0].Text
149
150 slog.InfoContext(ctx, "keyword search results processed",
151 "bytes", len(out),
152 "lines", strings.Count(out, "\n"),
153 "files", strings.Count(out, "\n\n"),
154 "query", input.Query,
155 "filtered", filtered,
156 )
157
158 return resp.Content[0].Text, nil
159}
160
161func ripgrep(ctx context.Context, wd string, terms []string) (string, error) {
162 args := []string{"-C", "10", "-i", "--line-number", "--with-filename"}
163 for _, term := range terms {
164 args = append(args, "-e", term)
165 }
166 cmd := exec.CommandContext(ctx, "rg", args...)
167 cmd.Dir = wd
168 out, err := cmd.CombinedOutput()
169 if err != nil {
170 // ripgrep returns exit code 1 when no matches are found, which is not an error for us
171 if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
172 return "no matches found", nil
173 }
174 return "", fmt.Errorf("search failed: %v\n%s", err, out)
175 }
176 outStr := string(out)
177 return outStr, nil
178}