llm: rename -dump-ant-calls to -dump-llm

And tweak behavior.

Co-Authored-By: sketch <hello@sketch.dev>
Change-ID: s1605a29b744ae5f1k
diff --git a/cmd/sketch/main.go b/cmd/sketch/main.go
index 7bfcf38..938a784 100644
--- a/cmd/sketch/main.go
+++ b/cmd/sketch/main.go
@@ -294,8 +294,8 @@
 	bashSlowTimeout       string
 	bashBackgroundTimeout string
 	passthroughUpstream   bool
-	// Claude debugging
-	dumpAntCalls bool
+	// LLM debugging
+	dumpLLM bool
 }
 
 // parseCLIFlags parses all command-line flags and returns a CLIFlags struct
@@ -371,7 +371,7 @@
 	// Internal flags for development/debugging
 	internalFlags.StringVar(&flags.dumpDist, "dump-dist", "", "(internal) dump embedded /dist/ filesystem to specified directory and exit")
 	internalFlags.StringVar(&flags.subtraceToken, "subtrace-token", "", "(development) run sketch under subtrace.dev with the provided token")
-	internalFlags.BoolVar(&flags.dumpAntCalls, "dump-ant-calls", false, "(debugging) dump raw communications with Claude to files in ~/.cache/sketch/")
+	internalFlags.BoolVar(&flags.dumpLLM, "dump-llm", false, "(debugging) dump raw communications with LLM services to files in ~/.cache/sketch/")
 
 	// Custom usage function that shows only user-visible flags by default
 	userFlags.Usage = func() {
@@ -521,7 +521,7 @@
 		SubtraceToken:       flags.subtraceToken,
 		MCPServers:          flags.mcpServers,
 		PassthroughUpstream: flags.passthroughUpstream,
-		DumpAntCalls:        flags.dumpAntCalls,
+		DumpLLM:             flags.dumpLLM,
 	}
 
 	if err := dockerimg.LaunchContainer(ctx, config); err != nil {
@@ -905,10 +905,10 @@
 			return nil, fmt.Errorf("missing ANTHROPIC_API_KEY")
 		}
 		return &ant.Service{
-			HTTPC:        client,
-			URL:          modelURL,
-			APIKey:       apiKey,
-			DumpAntCalls: flags.dumpAntCalls,
+			HTTPC:   client,
+			URL:     modelURL,
+			APIKey:  apiKey,
+			DumpLLM: flags.dumpLLM,
 		}, nil
 	}
 
@@ -917,10 +917,11 @@
 			return nil, fmt.Errorf("missing %s", gem.GeminiAPIKeyEnv)
 		}
 		return &gem.Service{
-			HTTPC:  client,
-			URL:    modelURL,
-			Model:  gem.DefaultModel,
-			APIKey: apiKey,
+			HTTPC:   client,
+			URL:     modelURL,
+			Model:   gem.DefaultModel,
+			APIKey:  apiKey,
+			DumpLLM: flags.dumpLLM,
 		}, nil
 	}
 
@@ -936,9 +937,10 @@
 	}
 
 	return &oai.Service{
-		HTTPC:  client,
-		Model:  *model,
-		APIKey: apiKey,
+		HTTPC:   client,
+		Model:   *model,
+		APIKey:  apiKey,
+		DumpLLM: flags.dumpLLM,
 	}, nil
 }
 
diff --git a/dockerimg/dockerimg.go b/dockerimg/dockerimg.go
index d545a68..255e021 100644
--- a/dockerimg/dockerimg.go
+++ b/dockerimg/dockerimg.go
@@ -140,8 +140,8 @@
 	// PassthroughUpstream configures upstream remote for passthrough to innie
 	PassthroughUpstream bool
 
-	// DumpAntCalls enables dumping raw communications with Claude to files
-	DumpAntCalls bool
+	// DumpLLM requests dumping of raw communications with LLM services to files
+	DumpLLM bool
 }
 
 // LaunchContainer creates a docker container for a project, installs sketch and opens a connection to it.
@@ -648,8 +648,8 @@
 	if config.PassthroughUpstream {
 		cmdArgs = append(cmdArgs, "-passthrough-upstream")
 	}
-	if config.DumpAntCalls {
-		cmdArgs = append(cmdArgs, "-dump-ant-calls")
+	if config.DumpLLM {
+		cmdArgs = append(cmdArgs, "-dump-llm")
 	}
 
 	// Add additional docker arguments if provided
diff --git a/llm/ant/ant.go b/llm/ant/ant.go
index 0b65d48..212c090 100644
--- a/llm/ant/ant.go
+++ b/llm/ant/ant.go
@@ -11,8 +11,6 @@
 	"log/slog"
 	"math/rand/v2"
 	"net/http"
-	"os"
-	"path/filepath"
 	"strings"
 	"testing"
 	"time"
@@ -59,12 +57,12 @@
 // Service provides Claude completions.
 // Fields should not be altered concurrently with calling any method on Service.
 type Service struct {
-	HTTPC        *http.Client // defaults to http.DefaultClient if nil
-	URL          string       // defaults to DefaultURL if empty
-	APIKey       string       // must be non-empty
-	Model        string       // defaults to DefaultModel if empty
-	MaxTokens    int          // defaults to DefaultMaxTokens if zero
-	DumpAntCalls bool         // whether to dump request/response text to files for debugging; defaults to false
+	HTTPC     *http.Client // defaults to http.DefaultClient if nil
+	URL       string       // defaults to DefaultURL if empty
+	APIKey    string       // must be non-empty
+	Model     string       // defaults to DefaultModel if empty
+	MaxTokens int          // defaults to DefaultMaxTokens if zero
+	DumpLLM   bool         // whether to dump request/response text to files for debugging; defaults to false
 }
 
 var _ llm.Service = (*Service)(nil)
@@ -210,23 +208,6 @@
 	TokenEfficientToolUse bool `json:"-"` // DO NOT USE, broken on Anthropic's side as of 2025-02-28
 }
 
-// dumpToFile writes the content to a timestamped file in ~/.cache/sketch/, with typ in the filename.
-func dumpToFile(typ string, content []byte) error {
-	homeDir, err := os.UserHomeDir()
-	if err != nil {
-		return err
-	}
-	cacheDir := filepath.Join(homeDir, ".cache", "sketch")
-	err = os.MkdirAll(cacheDir, 0o700)
-	if err != nil {
-		return err
-	}
-	now := time.Now()
-	filename := fmt.Sprintf("%d_%s.txt", now.UnixMilli(), typ)
-	filePath := filepath.Join(cacheDir, filename)
-	return os.WriteFile(filePath, content, 0o600)
-}
-
 func mapped[Slice ~[]E, E, T any](s Slice, f func(E) T) []T {
 	out := make([]T, len(s))
 	for i, v := range s {
@@ -440,7 +421,7 @@
 
 	var payload []byte
 	var err error
-	if s.DumpAntCalls || testing.Testing() {
+	if s.DumpLLM || testing.Testing() {
 		payload, err = json.MarshalIndent(request, "", " ")
 	} else {
 		payload, err = json.Marshal(request)
@@ -472,8 +453,8 @@
 			slog.WarnContext(ctx, "anthropic request sleep before retry", "sleep", sleep, "attempts", attempts)
 			time.Sleep(sleep)
 		}
-		if s.DumpAntCalls {
-			if err := dumpToFile("request", payload); err != nil {
+		if s.DumpLLM {
+			if err := llm.DumpToFile("request", url, payload); err != nil {
 				slog.WarnContext(ctx, "failed to dump request to file", "error", err)
 			}
 		}
@@ -516,8 +497,8 @@
 
 		switch {
 		case resp.StatusCode == http.StatusOK:
-			if s.DumpAntCalls {
-				if err := dumpToFile("response", buf); err != nil {
+			if s.DumpLLM {
+				if err := llm.DumpToFile("response", "", buf); err != nil {
 					slog.WarnContext(ctx, "failed to dump response to file", "error", err)
 				}
 			}
diff --git a/llm/gem/gem.go b/llm/gem/gem.go
index 1621f2d..358770c 100644
--- a/llm/gem/gem.go
+++ b/llm/gem/gem.go
@@ -23,10 +23,11 @@
 // Service provides Gemini completions.
 // Fields should not be altered concurrently with calling any method on Service.
 type Service struct {
-	HTTPC  *http.Client // defaults to http.DefaultClient if nil
-	URL    string       // Gemini API URL, uses the gemini package default if empty
-	APIKey string       // must be non-empty
-	Model  string       // defaults to DefaultModel if empty
+	HTTPC   *http.Client // defaults to http.DefaultClient if nil
+	URL     string       // Gemini API URL, uses the gemini package default if empty
+	APIKey  string       // must be non-empty
+	Model   string       // defaults to DefaultModel if empty
+	DumpLLM bool         // whether to dump request/response text to files for debugging; defaults to false
 }
 
 var _ llm.Service = (*Service)(nil)
@@ -513,6 +514,14 @@
 	// Log the structured Gemini request for debugging
 	if reqJSON, err := json.MarshalIndent(gemReq, "", "  "); err == nil {
 		slog.DebugContext(ctx, "gemini_request_json", "request", string(reqJSON))
+		if s.DumpLLM {
+			// Construct the same URL that the Gemini client will use
+			endpoint := cmp.Or(s.URL, "https://generativelanguage.googleapis.com/v1beta")
+			url := fmt.Sprintf("%s/models/%s:generateContent?key=%s", endpoint, cmp.Or(s.Model, DefaultModel), s.APIKey)
+			if err := llm.DumpToFile("request", url, reqJSON); err != nil {
+				slog.WarnContext(ctx, "failed to dump gemini request to file", "error", err)
+			}
+		}
 	}
 
 	// Create a Gemini model instance
@@ -540,6 +549,11 @@
 			// Log the structured Gemini response
 			if resJSON, err := json.MarshalIndent(gemRes, "", "  "); err == nil {
 				slog.DebugContext(ctx, "gemini_response_json", "response", string(resJSON))
+				if s.DumpLLM {
+					if err := llm.DumpToFile("response", "", resJSON); err != nil {
+						slog.WarnContext(ctx, "failed to dump gemini response to file", "error", err)
+					}
+				}
 			}
 			break
 		}
diff --git a/llm/llm.go b/llm/llm.go
index 2c1011e..638a6a3 100644
--- a/llm/llm.go
+++ b/llm/llm.go
@@ -7,6 +7,8 @@
 	"fmt"
 	"log/slog"
 	"net/http"
+	"os"
+	"path/filepath"
 	"strconv"
 	"strings"
 	"time"
@@ -294,3 +296,30 @@
 func ErrorfToolOut(format string, args ...any) ToolOut {
 	return ErrorToolOut(fmt.Errorf(format, args...))
 }
+
+// DumpToFile writes LLM communication content to a timestamped file in ~/.cache/sketch/.
+// For requests, it includes the URL followed by the content. For responses, it only includes the content.
+// The typ parameter is used as a prefix in the filename ("request", "response").
+func DumpToFile(typ string, url string, content []byte) error {
+	homeDir, err := os.UserHomeDir()
+	if err != nil {
+		return err
+	}
+	cacheDir := filepath.Join(homeDir, ".cache", "sketch")
+	err = os.MkdirAll(cacheDir, 0o700)
+	if err != nil {
+		return err
+	}
+	now := time.Now()
+	filename := fmt.Sprintf("%s_%d.txt", typ, now.UnixMilli())
+	filePath := filepath.Join(cacheDir, filename)
+
+	// For requests, start with the URL; for responses, just write the content
+	data := []byte(url)
+	if url != "" {
+		data = append(data, "\n\n"...)
+	}
+	data = append(data, content...)
+
+	return os.WriteFile(filePath, data, 0o600)
+}
diff --git a/llm/oai/oai.go b/llm/oai/oai.go
index 28fbb04..e104c7f 100644
--- a/llm/oai/oai.go
+++ b/llm/oai/oai.go
@@ -218,6 +218,7 @@
 	Model     Model        // defaults to DefaultModel if zero value
 	MaxTokens int          // defaults to DefaultMaxTokens if zero
 	Org       string       // optional - organization ID
+	DumpLLM   bool         // whether to dump request/response text to files for debugging; defaults to false
 }
 
 var _ llm.Service = (*Service)(nil)
@@ -661,11 +662,17 @@
 	} else {
 		req.MaxTokens = cmp.Or(s.MaxTokens, DefaultMaxTokens)
 	}
-	// fmt.Printf("Sending request to OpenAI\n")
-	// enc := json.NewEncoder(os.Stdout)
-	// enc.SetIndent("", "  ")
-	// enc.Encode(req)
-	// fmt.Printf("\n")
+	// Dump request if enabled
+	if s.DumpLLM {
+		if reqJSON, err := json.MarshalIndent(req, "", "  "); err == nil {
+			// Construct the chat completions URL
+			baseURL := cmp.Or(model.URL, OpenAIURL)
+			url := baseURL + "/chat/completions"
+			if err := llm.DumpToFile("request", url, reqJSON); err != nil {
+				slog.WarnContext(ctx, "failed to dump openai request to file", "error", err)
+			}
+		}
+	}
 
 	// Retry mechanism
 	backoff := []time.Duration{1 * time.Second, 2 * time.Second, 5 * time.Second, 10 * time.Second, 15 * time.Second}
@@ -686,6 +693,14 @@
 
 		// Handle successful response
 		if err == nil {
+			// Dump response if enabled
+			if s.DumpLLM {
+				if respJSON, jsonErr := json.MarshalIndent(resp, "", "  "); jsonErr == nil {
+					if dumpErr := llm.DumpToFile("response", "", respJSON); dumpErr != nil {
+						slog.WarnContext(ctx, "failed to dump openai response to file", "error", dumpErr)
+					}
+				}
+			}
 			return s.toLLMResponse(&resp), nil
 		}