all: support openai-compatible models
The support is rather minimal at this point:
Only hard-coded models, only -unsafe, only -skabandaddr="".
The "shared" LLM package is strongly Claude-flavored.
We can fix all of this and more over time, if we are inspired to.
(Maybe we'll switch to https://github.com/maruel/genai?)
The goal for now is to get the rough structure in place.
I've rebased and rebuilt this more times than I care to remember.
diff --git a/llm/oai/oai.go b/llm/oai/oai.go
new file mode 100644
index 0000000..3e772ab
--- /dev/null
+++ b/llm/oai/oai.go
@@ -0,0 +1,592 @@
+package oai
+
+import (
+ "cmp"
+ "context"
+ "encoding/json"
+ "errors"
+ "fmt"
+ "log/slog"
+ "math/rand/v2"
+ "net/http"
+ "time"
+
+ "github.com/sashabaranov/go-openai"
+ "sketch.dev/llm"
+)
+
+const (
+ DefaultMaxTokens = 8192
+
+ OpenAIURL = "https://api.openai.com/v1"
+ FireworksURL = "https://api.fireworks.ai/inference/v1"
+ LlamaCPPURL = "http://localhost:8080/v1"
+ TogetherURL = "https://api.together.xyz/v1"
+ GeminiURL = "https://generativelanguage.googleapis.com/v1beta/openai/"
+
+ // Environment variable names for API keys
+ OpenAIAPIKeyEnv = "OPENAI_API_KEY"
+ FireworksAPIKeyEnv = "FIREWORKS_API_KEY"
+ TogetherAPIKeyEnv = "TOGETHER_API_KEY"
+ GeminiAPIKeyEnv = "GEMINI_API_KEY"
+)
+
+type Model struct {
+ UserName string // provided by the user to identify this model (e.g. "gpt4.1")
+ ModelName string // provided to the service provide to specify which model to use (e.g. "gpt-4.1-2025-04-14")
+ URL string
+ Cost ModelCost
+ APIKeyEnv string // environment variable name for the API key
+}
+
+type ModelCost struct {
+ Input uint64 // in cents per million tokens
+ CachedInput uint64 // in cents per million tokens
+ Output uint64 // in cents per million tokens
+}
+
+var (
+ DefaultModel = GPT41
+
+ GPT41 = Model{
+ UserName: "gpt4.1",
+ ModelName: "gpt-4.1-2025-04-14",
+ URL: OpenAIURL,
+ Cost: ModelCost{Input: 200, CachedInput: 50, Output: 800},
+ APIKeyEnv: OpenAIAPIKeyEnv,
+ }
+
+ Gemini25Flash = Model{
+ UserName: "gemini-flash-2.5",
+ ModelName: "gemini-2.5-flash-preview-04-17",
+ URL: GeminiURL,
+ Cost: ModelCost{Input: 15, Output: 60},
+ APIKeyEnv: GeminiAPIKeyEnv,
+ }
+
+ Gemini25Pro = Model{
+ UserName: "gemini-pro-2.5",
+ ModelName: "gemini-2.5-pro-preview-03-25",
+ URL: GeminiURL,
+ // GRRRR. Really??
+ // Input is: $1.25, prompts <= 200k tokens, $2.50, prompts > 200k tokens
+ // Output is: $10.00, prompts <= 200k tokens, $15.00, prompts > 200k
+ // Caching is: $0.31, prompts <= 200k tokens, $0.625, prompts > 200k, $4.50 / 1,000,000 tokens per hour
+ // Whatever that means. Are we caching? I have no idea.
+ // How do you always manage to be the annoying one, Google?
+ // I'm not complicating things just for you.
+ Cost: ModelCost{Input: 125, Output: 1000},
+ APIKeyEnv: GeminiAPIKeyEnv,
+ }
+
+ TogetherDeepseekV3 = Model{
+ UserName: "together-deepseek-v3",
+ ModelName: "deepseek-ai/DeepSeek-V3",
+ URL: TogetherURL,
+ Cost: ModelCost{Input: 125, Output: 125},
+ APIKeyEnv: TogetherAPIKeyEnv,
+ }
+
+ TogetherLlama4Maverick = Model{
+ UserName: "together-llama4-maverick",
+ ModelName: "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+ URL: TogetherURL,
+ Cost: ModelCost{Input: 27, Output: 85},
+ APIKeyEnv: TogetherAPIKeyEnv,
+ }
+
+ TogetherLlama3_3_70B = Model{
+ UserName: "together-llama3-70b",
+ ModelName: "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+ URL: TogetherURL,
+ Cost: ModelCost{Input: 88, Output: 88},
+ APIKeyEnv: TogetherAPIKeyEnv,
+ }
+
+ TogetherMistralSmall = Model{
+ UserName: "together-mistral-small",
+ ModelName: "mistralai/Mistral-Small-24B-Instruct-2501",
+ URL: TogetherURL,
+ Cost: ModelCost{Input: 80, Output: 80},
+ APIKeyEnv: TogetherAPIKeyEnv,
+ }
+
+ LlamaCPP = Model{
+ UserName: "llama.cpp",
+ ModelName: "llama.cpp local model",
+ URL: LlamaCPPURL,
+ // zero cost
+ Cost: ModelCost{},
+ }
+
+ FireworksDeepseekV3 = Model{
+ UserName: "fireworks-deepseek-v3",
+ ModelName: "accounts/fireworks/models/deepseek-v3-0324",
+ URL: FireworksURL,
+ Cost: ModelCost{Input: 90, Output: 90}, // not entirely sure about this, they don't list pricing anywhere convenient
+ APIKeyEnv: FireworksAPIKeyEnv,
+ }
+)
+
+// Service provides chat completions.
+// Fields should not be altered concurrently with calling any method on Service.
+type Service struct {
+ HTTPC *http.Client // defaults to http.DefaultClient if nil
+ APIKey string // optional, if not set will try to load from env var
+ Model Model // defaults to DefaultModel if zero value
+ MaxTokens int // defaults to DefaultMaxTokens if zero
+ Org string // optional - organization ID
+}
+
+var _ llm.Service = (*Service)(nil)
+
+// ModelsRegistry is a registry of all known models with their user-friendly names.
+var ModelsRegistry = []Model{
+ GPT41,
+ Gemini25Flash,
+ Gemini25Pro,
+ TogetherDeepseekV3,
+ TogetherLlama4Maverick,
+ TogetherLlama3_3_70B,
+ TogetherMistralSmall,
+ LlamaCPP,
+ FireworksDeepseekV3,
+}
+
+// ListModels returns a list of all available models with their user-friendly names.
+func ListModels() []string {
+ var names []string
+ for _, model := range ModelsRegistry {
+ if model.UserName != "" {
+ names = append(names, model.UserName)
+ }
+ }
+ return names
+}
+
+// ModelByUserName returns a model by its user-friendly name.
+// Returns nil if no model with the given name is found.
+func ModelByUserName(name string) *Model {
+ for _, model := range ModelsRegistry {
+ if model.UserName == name {
+ return &model
+ }
+ }
+ return nil
+}
+
+var (
+ fromLLMRole = map[llm.MessageRole]string{
+ llm.MessageRoleAssistant: "assistant",
+ llm.MessageRoleUser: "user",
+ }
+ fromLLMContentType = map[llm.ContentType]string{
+ llm.ContentTypeText: "text",
+ llm.ContentTypeToolUse: "function", // OpenAI uses function instead of tool_call
+ llm.ContentTypeToolResult: "tool_result",
+ llm.ContentTypeThinking: "text", // Map thinking to text since OpenAI doesn't have thinking
+ llm.ContentTypeRedactedThinking: "text", // Map redacted_thinking to text
+ }
+ fromLLMToolChoiceType = map[llm.ToolChoiceType]string{
+ llm.ToolChoiceTypeAuto: "auto",
+ llm.ToolChoiceTypeAny: "any",
+ llm.ToolChoiceTypeNone: "none",
+ llm.ToolChoiceTypeTool: "function", // OpenAI uses "function" instead of "tool"
+ }
+ toLLMRole = map[string]llm.MessageRole{
+ "assistant": llm.MessageRoleAssistant,
+ "user": llm.MessageRoleUser,
+ }
+ toLLMStopReason = map[string]llm.StopReason{
+ "stop": llm.StopReasonStopSequence,
+ "length": llm.StopReasonMaxTokens,
+ "tool_calls": llm.StopReasonToolUse,
+ "function_call": llm.StopReasonToolUse, // Map both to ToolUse
+ "content_filter": llm.StopReasonStopSequence, // No direct equivalent
+ }
+)
+
+// fromLLMContent converts llm.Content to the format expected by OpenAI.
+func fromLLMContent(c llm.Content) (string, []openai.ToolCall) {
+ switch c.Type {
+ case llm.ContentTypeText:
+ return c.Text, nil
+ case llm.ContentTypeToolUse:
+ // For OpenAI, tool use is sent as a null content with tool_calls in the message
+ return "", []openai.ToolCall{
+ {
+ Type: openai.ToolTypeFunction,
+ ID: c.ID, // Use the content ID if provided
+ Function: openai.FunctionCall{
+ Name: c.ToolName,
+ Arguments: string(c.ToolInput),
+ },
+ },
+ }
+ case llm.ContentTypeToolResult:
+ // Tool results in OpenAI are sent as a separate message with tool_call_id
+ return c.ToolResult, nil
+ default:
+ // For thinking or other types, convert to text
+ return c.Text, nil
+ }
+}
+
+// fromLLMMessage converts llm.Message to OpenAI ChatCompletionMessage format
+func fromLLMMessage(msg llm.Message) []openai.ChatCompletionMessage {
+ // For OpenAI, we need to handle tool results differently than regular messages
+ // Each tool result becomes its own message with role="tool"
+
+ var messages []openai.ChatCompletionMessage
+
+ // Check if this is a regular message or contains tool results
+ var regularContent []llm.Content
+ var toolResults []llm.Content
+
+ for _, c := range msg.Content {
+ if c.Type == llm.ContentTypeToolResult {
+ toolResults = append(toolResults, c)
+ } else {
+ regularContent = append(regularContent, c)
+ }
+ }
+
+ // Process tool results as separate messages, but first
+ for _, tr := range toolResults {
+ m := openai.ChatCompletionMessage{
+ Role: "tool",
+ Content: cmp.Or(tr.ToolResult, " "), // TODO: remove omitempty upstream
+ ToolCallID: tr.ToolUseID,
+ }
+ messages = append(messages, m)
+ }
+ // Process regular content second
+ if len(regularContent) > 0 {
+ m := openai.ChatCompletionMessage{
+ Role: fromLLMRole[msg.Role],
+ }
+
+ // For assistant messages that contain tool calls
+ var toolCalls []openai.ToolCall
+ var textContent string
+
+ for _, c := range regularContent {
+ content, tools := fromLLMContent(c)
+ if len(tools) > 0 {
+ toolCalls = append(toolCalls, tools...)
+ } else if content != "" {
+ if textContent != "" {
+ textContent += "\n"
+ }
+ textContent += content
+ }
+ }
+
+ m.Content = textContent
+ m.ToolCalls = toolCalls
+
+ messages = append(messages, m)
+ }
+
+ return messages
+}
+
+// fromLLMToolChoice converts llm.ToolChoice to the format expected by OpenAI.
+func fromLLMToolChoice(tc *llm.ToolChoice) any {
+ if tc == nil {
+ return nil
+ }
+
+ if tc.Type == llm.ToolChoiceTypeTool && tc.Name != "" {
+ return openai.ToolChoice{
+ Type: openai.ToolTypeFunction,
+ Function: openai.ToolFunction{
+ Name: tc.Name,
+ },
+ }
+ }
+
+ // For non-specific tool choice, just use the string
+ return fromLLMToolChoiceType[tc.Type]
+}
+
+// fromLLMTool converts llm.Tool to the format expected by OpenAI.
+func fromLLMTool(t *llm.Tool) openai.Tool {
+ return openai.Tool{
+ Type: openai.ToolTypeFunction,
+ Function: &openai.FunctionDefinition{
+ Name: t.Name,
+ Description: t.Description,
+ Parameters: t.InputSchema,
+ },
+ }
+}
+
+// fromLLMSystem converts llm.SystemContent to an OpenAI system message.
+func fromLLMSystem(systemContent []llm.SystemContent) []openai.ChatCompletionMessage {
+ if len(systemContent) == 0 {
+ return nil
+ }
+
+ // Combine all system content into a single system message
+ var systemText string
+ for i, content := range systemContent {
+ if i > 0 && systemText != "" && content.Text != "" {
+ systemText += "\n"
+ }
+ systemText += content.Text
+ }
+
+ if systemText == "" {
+ return nil
+ }
+
+ return []openai.ChatCompletionMessage{
+ {
+ Role: "system",
+ Content: systemText,
+ },
+ }
+}
+
+// toRawLLMContent converts a raw content string from OpenAI to llm.Content.
+func toRawLLMContent(content string) llm.Content {
+ return llm.Content{
+ Type: llm.ContentTypeText,
+ Text: content,
+ }
+}
+
+// toToolCallLLMContent converts a tool call from OpenAI to llm.Content.
+func toToolCallLLMContent(toolCall openai.ToolCall) llm.Content {
+ // Generate a content ID if needed
+ id := toolCall.ID
+ if id == "" {
+ // Create a deterministic ID based on the function name if no ID is provided
+ id = "tc_" + toolCall.Function.Name
+ }
+
+ return llm.Content{
+ ID: id,
+ Type: llm.ContentTypeToolUse,
+ ToolName: toolCall.Function.Name,
+ ToolInput: json.RawMessage(toolCall.Function.Arguments),
+ }
+}
+
+// toToolResultLLMContent converts a tool result message from OpenAI to llm.Content.
+func toToolResultLLMContent(msg openai.ChatCompletionMessage) llm.Content {
+ return llm.Content{
+ Type: llm.ContentTypeToolResult,
+ ToolUseID: msg.ToolCallID,
+ ToolResult: msg.Content,
+ ToolError: false, // OpenAI doesn't specify errors explicitly
+ }
+}
+
+// toLLMContents converts message content from OpenAI to []llm.Content.
+func toLLMContents(msg openai.ChatCompletionMessage) []llm.Content {
+ var contents []llm.Content
+
+ // If this is a tool response, handle it separately
+ if msg.Role == "tool" && msg.ToolCallID != "" {
+ return []llm.Content{toToolResultLLMContent(msg)}
+ }
+
+ // If there's text content, add it
+ if msg.Content != "" {
+ contents = append(contents, toRawLLMContent(msg.Content))
+ }
+
+ // If there are tool calls, add them
+ for _, tc := range msg.ToolCalls {
+ contents = append(contents, toToolCallLLMContent(tc))
+ }
+
+ // If empty, add an empty text content
+ if len(contents) == 0 {
+ contents = append(contents, llm.Content{
+ Type: llm.ContentTypeText,
+ Text: "",
+ })
+ }
+
+ return contents
+}
+
+// toLLMUsage converts usage information from OpenAI to llm.Usage.
+func (s *Service) toLLMUsage(model string, au openai.Usage) llm.Usage {
+ // fmt.Printf("raw usage: %+v / %v / %v\n", au, au.PromptTokensDetails, au.CompletionTokensDetails)
+ in := uint64(au.PromptTokens)
+ var inc uint64
+ if au.PromptTokensDetails != nil {
+ inc = uint64(au.PromptTokensDetails.CachedTokens)
+ }
+ out := uint64(au.CompletionTokens)
+ u := llm.Usage{
+ InputTokens: in,
+ CacheReadInputTokens: inc,
+ CacheCreationInputTokens: in,
+ OutputTokens: out,
+ }
+ u.CostUSD = s.calculateCostFromTokens(u)
+ return u
+}
+
+// toLLMResponse converts the OpenAI response to llm.Response.
+func (s *Service) toLLMResponse(r *openai.ChatCompletionResponse) *llm.Response {
+ // fmt.Printf("Raw response\n")
+ // enc := json.NewEncoder(os.Stdout)
+ // enc.SetIndent("", " ")
+ // enc.Encode(r)
+ // fmt.Printf("\n")
+
+ if len(r.Choices) == 0 {
+ return &llm.Response{
+ ID: r.ID,
+ Model: r.Model,
+ Role: llm.MessageRoleAssistant,
+ Usage: s.toLLMUsage(r.Model, r.Usage),
+ }
+ }
+
+ // Process the primary choice
+ choice := r.Choices[0]
+
+ return &llm.Response{
+ ID: r.ID,
+ Model: r.Model,
+ Role: toRoleFromString(choice.Message.Role),
+ Content: toLLMContents(choice.Message),
+ StopReason: toStopReason(string(choice.FinishReason)),
+ Usage: s.toLLMUsage(r.Model, r.Usage),
+ }
+}
+
+// toRoleFromString converts a role string to llm.MessageRole.
+func toRoleFromString(role string) llm.MessageRole {
+ if role == "tool" || role == "system" || role == "function" {
+ return llm.MessageRoleAssistant // Map special roles to assistant for consistency
+ }
+ if mr, ok := toLLMRole[role]; ok {
+ return mr
+ }
+ return llm.MessageRoleUser // Default to user if unknown
+}
+
+// toStopReason converts a finish reason string to llm.StopReason.
+func toStopReason(reason string) llm.StopReason {
+ if sr, ok := toLLMStopReason[reason]; ok {
+ return sr
+ }
+ return llm.StopReasonStopSequence // Default
+}
+
+// calculateCostFromTokens calculates the cost in dollars for the given model and token counts.
+func (s *Service) calculateCostFromTokens(u llm.Usage) float64 {
+ cost := s.Model.Cost
+
+ // TODO: check this for correctness, i am skeptical
+ // Calculate cost in cents
+ megaCents := u.CacheCreationInputTokens*cost.Input +
+ u.CacheReadInputTokens*cost.CachedInput +
+ u.OutputTokens*cost.Output
+
+ cents := float64(megaCents) / 1_000_000
+ // Convert to dollars
+ dollars := cents / 100.0
+ // fmt.Printf("in_new=%d, in_cached=%d, out=%d, cost=%.2f\n", u.CacheCreationInputTokens, u.CacheReadInputTokens, u.OutputTokens, dollars)
+ return dollars
+}
+
+// Do sends a request to OpenAI using the go-openai package.
+func (s *Service) Do(ctx context.Context, ir *llm.Request) (*llm.Response, error) {
+ // Configure the OpenAI client
+ httpc := cmp.Or(s.HTTPC, http.DefaultClient)
+ model := cmp.Or(s.Model, DefaultModel)
+
+ // TODO: do this one during Service setup? maybe with a constructor instead?
+ config := openai.DefaultConfig(s.APIKey)
+ if model.URL != "" {
+ config.BaseURL = model.URL
+ }
+ if s.Org != "" {
+ config.OrgID = s.Org
+ }
+ config.HTTPClient = httpc
+
+ client := openai.NewClientWithConfig(config)
+
+ // Start with system messages if provided
+ var allMessages []openai.ChatCompletionMessage
+ if len(ir.System) > 0 {
+ sysMessages := fromLLMSystem(ir.System)
+ allMessages = append(allMessages, sysMessages...)
+ }
+
+ // Add regular and tool messages
+ for _, msg := range ir.Messages {
+ msgs := fromLLMMessage(msg)
+ allMessages = append(allMessages, msgs...)
+ }
+
+ // Convert tools
+ var tools []openai.Tool
+ for _, t := range ir.Tools {
+ tools = append(tools, fromLLMTool(t))
+ }
+
+ // Create the OpenAI request
+ req := openai.ChatCompletionRequest{
+ Model: model.ModelName,
+ Messages: allMessages,
+ MaxTokens: cmp.Or(s.MaxTokens, DefaultMaxTokens),
+ Tools: tools,
+ ToolChoice: fromLLMToolChoice(ir.ToolChoice), // TODO: make fromLLMToolChoice return an error when a perfect translation is not possible
+ }
+ // fmt.Printf("Sending request to OpenAI\n")
+ // enc := json.NewEncoder(os.Stdout)
+ // enc.SetIndent("", " ")
+ // enc.Encode(req)
+ // fmt.Printf("\n")
+
+ // Retry mechanism
+ backoff := []time.Duration{1 * time.Second, 2 * time.Second, 5 * time.Second}
+
+ // retry loop
+ for attempts := 0; ; attempts++ {
+ resp, err := client.CreateChatCompletion(ctx, req)
+
+ // Handle successful response
+ if err == nil {
+ return s.toLLMResponse(&resp), nil
+ }
+
+ // Handle errors
+ var apiErr *openai.APIError
+ if ok := errors.As(err, &apiErr); !ok {
+ // Not an OpenAI API error, return immediately
+ return nil, err
+ }
+
+ switch {
+ case apiErr.HTTPStatusCode >= 500:
+ // Server error, try again with backoff
+ sleep := backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
+ slog.WarnContext(ctx, "openai_request_failed", "error", apiErr.Error(), "status_code", apiErr.HTTPStatusCode, "sleep", sleep)
+ time.Sleep(sleep)
+ continue
+
+ case apiErr.HTTPStatusCode == 429:
+ // Rate limited, back off longer
+ sleep := 20*time.Second + backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
+ slog.WarnContext(ctx, "openai_request_rate_limited", "error", apiErr.Error(), "sleep", sleep)
+ time.Sleep(sleep)
+ continue
+
+ default:
+ // Other error, return immediately
+ return nil, fmt.Errorf("OpenAI API error: %w", err)
+ }
+ }
+}
diff --git a/llm/oai/oai_test.go b/llm/oai/oai_test.go
new file mode 100644
index 0000000..7bea552
--- /dev/null
+++ b/llm/oai/oai_test.go
@@ -0,0 +1,96 @@
+package oai
+
+import (
+ "math"
+ "testing"
+
+ "sketch.dev/llm"
+)
+
+// TestCalculateCostFromTokens tests the calculateCostFromTokens method
+func TestCalculateCostFromTokens(t *testing.T) {
+ tests := []struct {
+ name string
+ model Model
+ cacheCreationTokens uint64
+ cacheReadTokens uint64
+ outputTokens uint64
+ want float64
+ }{
+ {
+ name: "Zero tokens",
+ model: GPT41,
+ cacheCreationTokens: 0,
+ cacheReadTokens: 0,
+ outputTokens: 0,
+ want: 0,
+ },
+ {
+ name: "1000 input tokens, 500 output tokens",
+ model: GPT41,
+ cacheCreationTokens: 1000,
+ cacheReadTokens: 0,
+ outputTokens: 500,
+ // GPT41: Input: 200 per million, Output: 800 per million
+ // (1000 * 200 + 500 * 800) / 1_000_000 / 100 = 0.006
+ want: 0.006,
+ },
+ {
+ name: "10000 input tokens, 5000 output tokens",
+ model: GPT41,
+ cacheCreationTokens: 10000,
+ cacheReadTokens: 0,
+ outputTokens: 5000,
+ // (10000 * 200 + 5000 * 800) / 1_000_000 / 100 = 0.06
+ want: 0.06,
+ },
+ {
+ name: "1000 input tokens, 500 output tokens Gemini",
+ model: Gemini25Flash,
+ cacheCreationTokens: 1000,
+ cacheReadTokens: 0,
+ outputTokens: 500,
+ // Gemini25Flash: Input: 15 per million, Output: 60 per million
+ // (1000 * 15 + 500 * 60) / 1_000_000 / 100 = 0.00045
+ want: 0.00045,
+ },
+ {
+ name: "With cache read tokens",
+ model: GPT41,
+ cacheCreationTokens: 500,
+ cacheReadTokens: 500, // 500 tokens from cache
+ outputTokens: 500,
+ // (500 * 200 + 500 * 50 + 500 * 800) / 1_000_000 / 100 = 0.00525
+ want: 0.00525,
+ },
+ {
+ name: "With all token types",
+ model: GPT41,
+ cacheCreationTokens: 1000,
+ cacheReadTokens: 1000,
+ outputTokens: 1000,
+ // (1000 * 200 + 1000 * 50 + 1000 * 800) / 1_000_000 / 100 = 0.0105
+ want: 0.0105,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ // Create a service with the test model
+ svc := &Service{Model: tt.model}
+
+ // Create a usage object
+ usage := llm.Usage{
+ CacheCreationInputTokens: tt.cacheCreationTokens,
+ CacheReadInputTokens: tt.cacheReadTokens,
+ OutputTokens: tt.outputTokens,
+ }
+
+ totalCost := svc.calculateCostFromTokens(usage)
+ if math.Abs(totalCost-tt.want) > 0.0001 {
+ t.Errorf("calculateCostFromTokens(%s, cache_creation=%d, cache_read=%d, output=%d) = %v, want %v",
+ tt.model.ModelName, tt.cacheCreationTokens, tt.cacheReadTokens, tt.outputTokens, totalCost, tt.want)
+ }
+ })
+ }
+}