llm and everything: Update ToolResult to use []Content instead of string for multimodal support This was a journey. The sketch-generated summary below is acceptable, but I want to tell you about it in my voice too. The goal was to send screenshots to Claude, so that it could... look at them. Currently the take screenshot and read screenshot tools are different, and they'll need to be renamed/prompt-engineered a bit, but that's all fine. The miserable part was that we had to change the return value of tool from string to Content[], and this crosses several layers: - llm.Tool - llm.Content - ant.Content & openai and gemini friends - AgentMessage [we left this alone] Extra fun is that Claude's API for sending images has nested Content fields, and empty string and missing needs to be distinguished for the Text field (because lots of shell commands return the empty string!). For the UI, I made us transform the results into a string, dropping images. This would have been yet more churn for not much obvious benefit. Plus, it was going to break skaband's compatibility, and ... yet more work. OpenAI and Gemini don't obviously support images in this same way, so they just don't get the tools. ~~~~~~~~~~ Sketch said: This architectural change transforms tool results from plain strings to []Content arrays, enabling multimodal interaction in the system. Key changes include: - Core structural changes: - Modified ToolResult type from string to []Content across all packages - Added MediaType field to Content struct for MIME type support - Created TextContent and ImageContent helper functions - Updated all tool.Run implementations to return []Content - Image handling: - Implemented base64 image support in Anthropic adapter - Added proper media type detection and content formatting - Created browser_read_image tool for displaying screenshots - Updated browser_screenshot to provide usable image paths - Adapter improvements: - Updated all LLM adapters (ANT, OAI, GEM) to handle content arrays - Added specialized image content handling in the Anthropic adapter - Ensured proper JSON serialization/deserialization for all content types - Improved test coverage for content arrays - UI enhancements: - Added omitempty tags to reduce JSON response size - Updated TypeScript types to handle array content - Made field naming consistent (tool_error vs is_error) - Preserved backward compatibility for existing consumers Co-Authored-By: sketch <hello@sketch.dev> Change-ID: s1a2b3c4d5e6f7g8h

commit: 72252cbcb97840d724133be67c4c69cc69ebb2d3 [log] [tgz]
author: Philip Zeyliger <philip@bold.dev> Sat May 10 17:00:08 2025 -0700
committer: Philip Zeyliger <philip@bold.dev> Sat May 10 17:00:08 2025 -0700
tree: a361499dc3fa6b9af2be3e74cfd59fd8ba34690e
parent: 7ce5fb76d8748ebf73c5adf9d6cd8eb67716fba8 [diff] [blame]
diff --git a/claudetool/bash.go b/claudetool/bash.go
index 4684d76..7dec267 100644
--- a/claudetool/bash.go
+++ b/claudetool/bash.go

@@ -102,22 +102,22 @@
 	}
 }
 
-func (b *BashTool) Run(ctx context.Context, m json.RawMessage) (string, error) {
+func (b *BashTool) Run(ctx context.Context, m json.RawMessage) ([]llm.Content, error) {
 	var req bashInput
 	if err := json.Unmarshal(m, &req); err != nil {
-		return "", fmt.Errorf("failed to unmarshal bash command input: %w", err)
+		return nil, fmt.Errorf("failed to unmarshal bash command input: %w", err)
 	}
 
 	// do a quick permissions check (NOT a security barrier)
 	err := bashkit.Check(req.Command)
 	if err != nil {
-		return "", err
+		return nil, err
 	}
 
 	// Custom permission callback if set
 	if b.CheckPermission != nil {
 		if err := b.CheckPermission(req.Command); err != nil {
-			return "", err
+			return nil, err
 		}
 	}
 
@@ -125,23 +125,23 @@
 	if req.Background {
 		result, err := executeBackgroundBash(ctx, req)
 		if err != nil {
-			return "", err
+			return nil, err
 		}
 		// Marshal the result to JSON
 		// TODO: emit XML(-ish) instead?
 		output, err := json.Marshal(result)
 		if err != nil {
-			return "", fmt.Errorf("failed to marshal background result: %w", err)
+			return nil, fmt.Errorf("failed to marshal background result: %w", err)
 		}
-		return string(output), nil
+		return llm.TextContent(string(output)), nil
 	}
 
 	// For foreground commands, use executeBash
 	out, execErr := executeBash(ctx, req)
-	if execErr == nil {
-		return out, nil
+	if execErr != nil {
+		return nil, execErr
 	}
-	return "", execErr
+	return llm.TextContent(out), nil
 }
 
 const maxBashOutputLength = 131072
@@ -300,7 +300,7 @@
 }
 
 // BashRun is the legacy function for testing compatibility
-func BashRun(ctx context.Context, m json.RawMessage) (string, error) {
+func BashRun(ctx context.Context, m json.RawMessage) ([]llm.Content, error) {
 	// Use the default Bash tool which has no permission callback
 	return Bash.Run(ctx, m)
 }
commit	72252cbcb97840d724133be67c4c69cc69ebb2d3	[log] [tgz]
author	Philip Zeyliger <philip@bold.dev>	Sat May 10 17:00:08 2025 -0700
committer	Philip Zeyliger <philip@bold.dev>	Sat May 10 17:00:08 2025 -0700
tree	a361499dc3fa6b9af2be3e74cfd59fd8ba34690e
parent	7ce5fb76d8748ebf73c5adf9d6cd8eb67716fba8 [diff] [blame]