llm and everything: Update ToolResult to use []Content instead of string for multimodal support
This was a journey. The sketch-generated summary below is acceptable,
but I want to tell you about it in my voice too. The goal was to send
screenshots to Claude, so that it could... look at them. Currently
the take screenshot and read screenshot tools are different, and they'll
need to be renamed/prompt-engineered a bit, but that's all fine.
The miserable part was that we had to change the return value
of tool from string to Content[], and this crosses several layers:
- llm.Tool
- llm.Content
- ant.Content & openai and gemini friends
- AgentMessage [we left this alone]
Extra fun is that Claude's API for sending images has nested Content
fields, and empty string and missing needs to be distinguished for the
Text field (because lots of shell commands return the empty string!).
For the UI, I made us transform the results into a string, dropping
images. This would have been yet more churn for not much obvious
benefit. Plus, it was going to break skaband's compatibility, and ...
yet more work.
OpenAI and Gemini don't obviously support images in this same way,
so they just don't get the tools.
~~~~~~~~~~ Sketch said:
This architectural change transforms tool results from plain strings to []Content arrays, enabling multimodal interaction in the system. Key changes include:
- Core structural changes:
- Modified ToolResult type from string to []Content across all packages
- Added MediaType field to Content struct for MIME type support
- Created TextContent and ImageContent helper functions
- Updated all tool.Run implementations to return []Content
- Image handling:
- Implemented base64 image support in Anthropic adapter
- Added proper media type detection and content formatting
- Created browser_read_image tool for displaying screenshots
- Updated browser_screenshot to provide usable image paths
- Adapter improvements:
- Updated all LLM adapters (ANT, OAI, GEM) to handle content arrays
- Added specialized image content handling in the Anthropic adapter
- Ensured proper JSON serialization/deserialization for all content types
- Improved test coverage for content arrays
- UI enhancements:
- Added omitempty tags to reduce JSON response size
- Updated TypeScript types to handle array content
- Made field naming consistent (tool_error vs is_error)
- Preserved backward compatibility for existing consumers
Co-Authored-By: sketch <hello@sketch.dev>
Change-ID: s1a2b3c4d5e6f7g8h
diff --git a/claudetool/codereview/codereview_test.go b/claudetool/codereview/codereview_test.go
index aa7b8f1..4872168 100644
--- a/claudetool/codereview/codereview_test.go
+++ b/claudetool/codereview/codereview_test.go
@@ -246,7 +246,11 @@
}
// Normalize paths in the result
- normalized := normalizePaths(result, dir)
+ resultStr := ""
+ if len(result) > 0 {
+ resultStr = result[0].Text
+ }
+ normalized := normalizePaths(resultStr, dir)
return normalized, nil
}
diff --git a/claudetool/codereview/differential.go b/claudetool/codereview/differential.go
index f358594..37728d4 100644
--- a/claudetool/codereview/differential.go
+++ b/claudetool/codereview/differential.go
@@ -37,27 +37,27 @@
return spec
}
-func (r *CodeReviewer) Run(ctx context.Context, m json.RawMessage) (string, error) {
+func (r *CodeReviewer) Run(ctx context.Context, m json.RawMessage) ([]llm.Content, error) {
// NOTE: If you add or modify error messages here, update the corresponding UI parsing in:
// webui/src/web-components/sketch-tool-card.ts (SketchToolCardCodeReview.getStatusIcon)
if err := r.RequireNormalGitState(ctx); err != nil {
slog.DebugContext(ctx, "CodeReviewer.Run: failed to check for normal git state", "err", err)
- return "", err
+ return nil, err
}
if err := r.RequireNoUncommittedChanges(ctx); err != nil {
slog.DebugContext(ctx, "CodeReviewer.Run: failed to check for uncommitted changes", "err", err)
- return "", err
+ return nil, err
}
// Check that the current commit is not the initial commit
currentCommit, err := r.CurrentCommit(ctx)
if err != nil {
slog.DebugContext(ctx, "CodeReviewer.Run: failed to get current commit", "err", err)
- return "", err
+ return nil, err
}
if r.IsInitialCommit(currentCommit) {
slog.DebugContext(ctx, "CodeReviewer.Run: current commit is initial commit, nothing to review")
- return "", fmt.Errorf("no new commits have been added, nothing to review")
+ return nil, fmt.Errorf("no new commits have been added, nothing to review")
}
// No matter what failures happen from here out, we will declare this to have been reviewed.
@@ -67,7 +67,7 @@
changedFiles, err := r.changedFiles(ctx, r.initialCommit, currentCommit)
if err != nil {
slog.DebugContext(ctx, "CodeReviewer.Run: failed to get changed files", "err", err)
- return "", err
+ return nil, err
}
// Prepare to analyze before/after for the impacted files.
@@ -79,7 +79,7 @@
if err != nil {
// TODO: log and skip to stuff that doesn't require packages
slog.DebugContext(ctx, "CodeReviewer.Run: failed to get packages for files", "err", err)
- return "", err
+ return nil, err
}
allPkgList := slices.Collect(maps.Keys(allPkgs))
@@ -101,7 +101,7 @@
testMsg, err := r.checkTests(ctx, allPkgList)
if err != nil {
slog.DebugContext(ctx, "CodeReviewer.Run: failed to check tests", "err", err)
- return "", err
+ return nil, err
}
if testMsg != "" {
errorMessages = append(errorMessages, testMsg)
@@ -110,7 +110,7 @@
goplsMsg, err := r.checkGopls(ctx, changedFiles) // includes vet checks
if err != nil {
slog.DebugContext(ctx, "CodeReviewer.Run: failed to check gopls", "err", err)
- return "", err
+ return nil, err
}
if goplsMsg != "" {
errorMessages = append(errorMessages, goplsMsg)
@@ -143,7 +143,7 @@
if buf.Len() == 0 {
buf.WriteString("OK")
}
- return buf.String(), nil
+ return llm.TextContent(buf.String()), nil
}
func (r *CodeReviewer) initializeInitialCommitWorktree(ctx context.Context) error {