| package claudetool |
| |
| import ( |
| "context" |
| _ "embed" |
| "encoding/json" |
| "fmt" |
| "log/slog" |
| "os/exec" |
| "strings" |
| |
| "sketch.dev/llm" |
| "sketch.dev/llm/conversation" |
| ) |
| |
| // The Keyword tool provides keyword search. |
| // TODO: use an embedding model + re-ranker or otherwise do something nicer than this kludge. |
| // TODO: if we can get this fast enough, do it on the fly while the user is typing their prompt. |
| var Keyword = &llm.Tool{ |
| Name: keywordName, |
| Description: keywordDescription, |
| InputSchema: llm.MustSchema(keywordInputSchema), |
| Run: keywordRun, |
| } |
| |
| const ( |
| keywordName = "keyword_search" |
| keywordDescription = ` |
| keyword_search locates files with a search-and-filter approach. |
| Use when navigating unfamiliar codebases with only conceptual understanding or vague user questions. |
| |
| Effective use: |
| - Provide a detailed query for accurate relevance ranking |
| - Prefer MANY SPECIFIC terms over FEW GENERAL ones (high precision beats high recall) |
| - Order search terms by importance (most important first) |
| - Supports regex search terms for flexible matching |
| |
| IMPORTANT: Do NOT use this tool if you have precise information like log lines, error messages, stack traces, filenames, or symbols. Use direct approaches (rg, cat, etc.) instead. |
| ` |
| |
| // If you modify this, update the termui template for prettier rendering. |
| keywordInputSchema = ` |
| { |
| "type": "object", |
| "required": [ |
| "query", |
| "search_terms" |
| ], |
| "properties": { |
| "query": { |
| "type": "string", |
| "description": "A detailed statement of what you're trying to find or learn." |
| }, |
| "search_terms": { |
| "type": "array", |
| "items": { |
| "type": "string" |
| }, |
| "description": "List of search terms in descending order of importance." |
| } |
| } |
| } |
| ` |
| ) |
| |
| type keywordInput struct { |
| Query string `json:"query"` |
| SearchTerms []string `json:"search_terms"` |
| } |
| |
| //go:embed keyword_system_prompt.txt |
| var keywordSystemPrompt string |
| |
| // FindRepoRoot attempts to find the git repository root from the current directory |
| func FindRepoRoot(wd string) (string, error) { |
| cmd := exec.Command("git", "rev-parse", "--show-toplevel") |
| cmd.Dir = wd |
| out, err := cmd.Output() |
| // todo: cwd here and throughout |
| if err != nil { |
| return "", fmt.Errorf("failed to find git repository root: %w", err) |
| } |
| return strings.TrimSpace(string(out)), nil |
| } |
| |
| func keywordRun(ctx context.Context, m json.RawMessage) llm.ToolOut { |
| var input keywordInput |
| if err := json.Unmarshal(m, &input); err != nil { |
| return llm.ErrorToolOut(err) |
| } |
| wd := WorkingDir(ctx) |
| root, err := FindRepoRoot(wd) |
| if err == nil { |
| wd = root |
| } |
| slog.InfoContext(ctx, "keyword search input", "query", input.Query, "keywords", input.SearchTerms, "wd", wd) |
| |
| // first remove stopwords |
| var keep []string |
| for _, term := range input.SearchTerms { |
| out, err := ripgrep(ctx, wd, []string{term}) |
| if err != nil { |
| return llm.ErrorToolOut(err) |
| } |
| if len(out) > 64*1024 { |
| slog.InfoContext(ctx, "keyword search result too large", "term", term, "bytes", len(out)) |
| continue |
| } |
| keep = append(keep, term) |
| } |
| |
| if len(keep) == 0 { |
| return llm.ToolOut{LLMContent: llm.TextContent("each of those search terms yielded too many results")} |
| } |
| |
| // peel off keywords until we get a result that fits in the query window |
| var out string |
| for { |
| var err error |
| out, err = ripgrep(ctx, wd, keep) |
| if err != nil { |
| return llm.ErrorToolOut(err) |
| } |
| if len(out) < 128*1024 { |
| break |
| } |
| keep = keep[:len(keep)-1] |
| } |
| |
| info := conversation.ToolCallInfoFromContext(ctx) |
| convo := info.Convo.SubConvo() |
| convo.SystemPrompt = strings.TrimSpace(keywordSystemPrompt) |
| convo.PromptCaching = false |
| |
| initialMessage := llm.Message{ |
| Role: llm.MessageRoleUser, |
| Content: []llm.Content{ |
| llm.StringContent("<pwd>\n" + wd + "\n</pwd>"), |
| llm.StringContent("<ripgrep_results>\n" + out + "\n</ripgrep_results>"), |
| llm.StringContent("<query>\n" + input.Query + "\n</query>"), |
| }, |
| } |
| |
| resp, err := convo.SendMessage(initialMessage) |
| if err != nil { |
| return llm.ErrorfToolOut("failed to send relevance filtering message: %w", err) |
| } |
| if len(resp.Content) != 1 { |
| return llm.ErrorfToolOut("unexpected number of messages in relevance filtering response: %d", len(resp.Content)) |
| } |
| |
| filtered := resp.Content[0].Text |
| |
| slog.InfoContext(ctx, "keyword search results processed", |
| "bytes", len(out), |
| "lines", strings.Count(out, "\n"), |
| "files", strings.Count(out, "\n\n"), |
| "query", input.Query, |
| "filtered", filtered, |
| ) |
| |
| return llm.ToolOut{LLMContent: llm.TextContent(resp.Content[0].Text)} |
| } |
| |
| func ripgrep(ctx context.Context, wd string, terms []string) (string, error) { |
| args := []string{"-C", "10", "-i", "--line-number", "--with-filename"} |
| for _, term := range terms { |
| args = append(args, "-e", term) |
| } |
| cmd := exec.CommandContext(ctx, "rg", args...) |
| cmd.Dir = wd |
| out, err := cmd.CombinedOutput() |
| if err != nil { |
| // ripgrep returns exit code 1 when no matches are found, which is not an error for us |
| if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 { |
| return "no matches found", nil |
| } |
| return "", fmt.Errorf("search failed: %v\n%s", err, out) |
| } |
| outStr := string(out) |
| return outStr, nil |
| } |