blob: 92b55f90b5b189fa68b49a7421f961a8c9c9ed89 [file] [log] [blame]
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -07001package ant
2
3import (
4 "bytes"
5 "cmp"
6 "context"
7 "encoding/json"
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -07008 "errors"
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -07009 "fmt"
10 "io"
11 "log/slog"
12 "math/rand/v2"
13 "net/http"
14 "strings"
15 "testing"
16 "time"
17
18 "sketch.dev/llm"
19)
20
21const (
Josh Bleecher Snyder0efb29d2025-05-22 21:05:04 -070022 DefaultModel = Claude4Sonnet
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070023 // See https://docs.anthropic.com/en/docs/about-claude/models/all-models for
24 // current maximums. There's currently a flag to enable 128k output (output-128k-2025-02-19)
25 DefaultMaxTokens = 8192
26 DefaultURL = "https://api.anthropic.com/v1/messages"
27)
28
29const (
30 Claude35Sonnet = "claude-3-5-sonnet-20241022"
31 Claude35Haiku = "claude-3-5-haiku-20241022"
32 Claude37Sonnet = "claude-3-7-sonnet-20250219"
Josh Bleecher Snyder0e8073a2025-05-22 21:04:51 -070033 Claude4Sonnet = "claude-sonnet-4-20250514"
34 Claude4Opus = "claude-opus-4-20250514"
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070035)
36
Philip Zeyligerb8a8f352025-06-02 07:39:37 -070037// TokenContextWindow returns the maximum token context window size for this service
38func (s *Service) TokenContextWindow() int {
39 model := s.Model
40 if model == "" {
41 model = DefaultModel
42 }
43
44 switch model {
45 case Claude35Sonnet, Claude37Sonnet:
46 return 200000
47 case Claude35Haiku:
48 return 200000
49 case Claude4Sonnet, Claude4Opus:
50 return 200000
51 default:
52 // Default for unknown models
53 return 200000
54 }
55}
56
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070057// Service provides Claude completions.
58// Fields should not be altered concurrently with calling any method on Service.
59type Service struct {
60 HTTPC *http.Client // defaults to http.DefaultClient if nil
61 URL string // defaults to DefaultURL if empty
62 APIKey string // must be non-empty
63 Model string // defaults to DefaultModel if empty
64 MaxTokens int // defaults to DefaultMaxTokens if zero
65}
66
67var _ llm.Service = (*Service)(nil)
68
69type content struct {
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070070 // https://docs.anthropic.com/en/api/messages
71 ID string `json:"id,omitempty"`
72 Type string `json:"type,omitempty"`
Philip Zeyliger72252cb2025-05-10 17:00:08 -070073
74 // Subtly, an empty string appears in tool results often, so we have
75 // to distinguish between empty string and no string.
76 // Underlying error looks like one of:
77 // "messages.46.content.0.tool_result.content.0.text.text: Field required""
78 // "messages.1.content.1.tool_use.text: Extra inputs are not permitted"
79 //
80 // I haven't found a super great source for the API, but
81 // https://github.com/anthropics/anthropic-sdk-typescript/blob/main/src/resources/messages/messages.ts
82 // is somewhat acceptable but hard to read.
83 Text *string `json:"text,omitempty"`
84 MediaType string `json:"media_type,omitempty"` // for image
85 Source json.RawMessage `json:"source,omitempty"` // for image
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070086
87 // for thinking
88 Thinking string `json:"thinking,omitempty"`
Philip Zeyliger72252cb2025-05-10 17:00:08 -070089 Data string `json:"data,omitempty"` // for redacted_thinking or image
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070090 Signature string `json:"signature,omitempty"` // for thinking
91
92 // for tool_use
93 ToolName string `json:"name,omitempty"`
94 ToolInput json.RawMessage `json:"input,omitempty"`
95
96 // for tool_result
Philip Zeyliger72252cb2025-05-10 17:00:08 -070097 ToolUseID string `json:"tool_use_id,omitempty"`
98 ToolError bool `json:"is_error,omitempty"`
99 // note the recursive nature here; message looks like:
100 // {
101 // "role": "user",
102 // "content": [
103 // {
104 // "type": "tool_result",
105 // "tool_use_id": "toolu_01A09q90qw90lq917835lq9",
106 // "content": [
107 // {"type": "text", "text": "15 degrees"},
108 // {
109 // "type": "image",
110 // "source": {
111 // "type": "base64",
112 // "media_type": "image/jpeg",
113 // "data": "/9j/4AAQSkZJRg...",
114 // }
115 // }
116 // ]
117 // }
118 // ]
119 //}
120 ToolResult []content `json:"content,omitempty"`
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700121
122 // timing information for tool_result; not sent to Claude
123 StartTime *time.Time `json:"-"`
124 EndTime *time.Time `json:"-"`
125
126 CacheControl json.RawMessage `json:"cache_control,omitempty"`
127}
128
129// message represents a message in the conversation.
130type message struct {
131 Role string `json:"role"`
132 Content []content `json:"content"`
133 ToolUse *toolUse `json:"tool_use,omitempty"` // use to control whether/which tool to use
134}
135
136// toolUse represents a tool use in the message content.
137type toolUse struct {
138 ID string `json:"id"`
139 Name string `json:"name"`
140}
141
142// tool represents a tool available to Claude.
143type tool struct {
144 Name string `json:"name"`
145 // Type is used by the text editor tool; see
146 // https://docs.anthropic.com/en/docs/build-with-claude/tool-use/text-editor-tool
147 Type string `json:"type,omitempty"`
148 Description string `json:"description,omitempty"`
149 InputSchema json.RawMessage `json:"input_schema,omitempty"`
150}
151
152// usage represents the billing and rate-limit usage.
153type usage struct {
154 InputTokens uint64 `json:"input_tokens"`
155 CacheCreationInputTokens uint64 `json:"cache_creation_input_tokens"`
156 CacheReadInputTokens uint64 `json:"cache_read_input_tokens"`
157 OutputTokens uint64 `json:"output_tokens"`
158 CostUSD float64 `json:"cost_usd"`
159}
160
161func (u *usage) Add(other usage) {
162 u.InputTokens += other.InputTokens
163 u.CacheCreationInputTokens += other.CacheCreationInputTokens
164 u.CacheReadInputTokens += other.CacheReadInputTokens
165 u.OutputTokens += other.OutputTokens
166 u.CostUSD += other.CostUSD
167}
168
169type errorResponse struct {
170 Type string `json:"type"`
171 Message string `json:"message"`
172}
173
174// response represents the response from the message API.
175type response struct {
176 ID string `json:"id"`
177 Type string `json:"type"`
178 Role string `json:"role"`
179 Model string `json:"model"`
180 Content []content `json:"content"`
181 StopReason string `json:"stop_reason"`
182 StopSequence *string `json:"stop_sequence,omitempty"`
183 Usage usage `json:"usage"`
184}
185
186type toolChoice struct {
187 Type string `json:"type"`
188 Name string `json:"name,omitempty"`
189}
190
191// https://docs.anthropic.com/en/api/messages#body-system
192type systemContent struct {
193 Text string `json:"text,omitempty"`
194 Type string `json:"type,omitempty"`
195 CacheControl json.RawMessage `json:"cache_control,omitempty"`
196}
197
198// request represents the request payload for creating a message.
199type request struct {
200 Model string `json:"model"`
201 Messages []message `json:"messages"`
202 ToolChoice *toolChoice `json:"tool_choice,omitempty"`
203 MaxTokens int `json:"max_tokens"`
204 Tools []*tool `json:"tools,omitempty"`
205 Stream bool `json:"stream,omitempty"`
206 System []systemContent `json:"system,omitempty"`
207 Temperature float64 `json:"temperature,omitempty"`
208 TopK int `json:"top_k,omitempty"`
209 TopP float64 `json:"top_p,omitempty"`
210 StopSequences []string `json:"stop_sequences,omitempty"`
211
212 TokenEfficientToolUse bool `json:"-"` // DO NOT USE, broken on Anthropic's side as of 2025-02-28
213}
214
215const dumpText = false // debugging toggle to see raw communications with Claude
216
217func mapped[Slice ~[]E, E, T any](s Slice, f func(E) T) []T {
218 out := make([]T, len(s))
219 for i, v := range s {
220 out[i] = f(v)
221 }
222 return out
223}
224
225func inverted[K, V cmp.Ordered](m map[K]V) map[V]K {
226 inv := make(map[V]K)
227 for k, v := range m {
228 if _, ok := inv[v]; ok {
229 panic(fmt.Errorf("inverted map has multiple keys for value %v", v))
230 }
231 inv[v] = k
232 }
233 return inv
234}
235
236var (
237 fromLLMRole = map[llm.MessageRole]string{
238 llm.MessageRoleAssistant: "assistant",
239 llm.MessageRoleUser: "user",
240 }
241 toLLMRole = inverted(fromLLMRole)
242
243 fromLLMContentType = map[llm.ContentType]string{
244 llm.ContentTypeText: "text",
245 llm.ContentTypeThinking: "thinking",
246 llm.ContentTypeRedactedThinking: "redacted_thinking",
247 llm.ContentTypeToolUse: "tool_use",
248 llm.ContentTypeToolResult: "tool_result",
249 }
250 toLLMContentType = inverted(fromLLMContentType)
251
252 fromLLMToolChoiceType = map[llm.ToolChoiceType]string{
253 llm.ToolChoiceTypeAuto: "auto",
254 llm.ToolChoiceTypeAny: "any",
255 llm.ToolChoiceTypeNone: "none",
256 llm.ToolChoiceTypeTool: "tool",
257 }
258
259 toLLMStopReason = map[string]llm.StopReason{
260 "stop_sequence": llm.StopReasonStopSequence,
261 "max_tokens": llm.StopReasonMaxTokens,
262 "end_turn": llm.StopReasonEndTurn,
263 "tool_use": llm.StopReasonToolUse,
Josh Bleecher Snyder0e8073a2025-05-22 21:04:51 -0700264 "refusal": llm.StopReasonRefusal,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700265 }
266)
267
268func fromLLMCache(c bool) json.RawMessage {
269 if !c {
270 return nil
271 }
272 return json.RawMessage(`{"type":"ephemeral"}`)
273}
274
275func fromLLMContent(c llm.Content) content {
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700276 var toolResult []content
277 if len(c.ToolResult) > 0 {
278 toolResult = make([]content, len(c.ToolResult))
279 for i, tr := range c.ToolResult {
280 // For image content inside a tool_result, we need to map it to "image" type
281 if tr.MediaType != "" && tr.MediaType == "image/jpeg" || tr.MediaType == "image/png" {
282 // Format as an image for Claude
283 toolResult[i] = content{
284 Type: "image",
285 Source: json.RawMessage(fmt.Sprintf(`{"type":"base64","media_type":"%s","data":"%s"}`,
286 tr.MediaType, tr.Data)),
287 }
288 } else {
289 toolResult[i] = fromLLMContent(tr)
290 }
291 }
292 }
293
294 d := content{
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700295 ID: c.ID,
296 Type: fromLLMContentType[c.Type],
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700297 MediaType: c.MediaType,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700298 Thinking: c.Thinking,
299 Data: c.Data,
300 Signature: c.Signature,
301 ToolName: c.ToolName,
302 ToolInput: c.ToolInput,
303 ToolUseID: c.ToolUseID,
304 ToolError: c.ToolError,
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700305 ToolResult: toolResult,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700306 CacheControl: fromLLMCache(c.Cache),
307 }
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700308 // Anthropic API complains if Text is specified when it shouldn't be
309 // or not specified when it's the empty string.
310 if c.Type != llm.ContentTypeToolResult && c.Type != llm.ContentTypeToolUse {
311 d.Text = &c.Text
312 }
313 return d
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700314}
315
316func fromLLMToolUse(tu *llm.ToolUse) *toolUse {
317 if tu == nil {
318 return nil
319 }
320 return &toolUse{
321 ID: tu.ID,
322 Name: tu.Name,
323 }
324}
325
326func fromLLMMessage(msg llm.Message) message {
327 return message{
328 Role: fromLLMRole[msg.Role],
329 Content: mapped(msg.Content, fromLLMContent),
330 ToolUse: fromLLMToolUse(msg.ToolUse),
331 }
332}
333
334func fromLLMToolChoice(tc *llm.ToolChoice) *toolChoice {
335 if tc == nil {
336 return nil
337 }
338 return &toolChoice{
339 Type: fromLLMToolChoiceType[tc.Type],
340 Name: tc.Name,
341 }
342}
343
344func fromLLMTool(t *llm.Tool) *tool {
345 return &tool{
346 Name: t.Name,
347 Type: t.Type,
348 Description: t.Description,
349 InputSchema: t.InputSchema,
350 }
351}
352
353func fromLLMSystem(s llm.SystemContent) systemContent {
354 return systemContent{
355 Text: s.Text,
356 Type: s.Type,
357 CacheControl: fromLLMCache(s.Cache),
358 }
359}
360
361func (s *Service) fromLLMRequest(r *llm.Request) *request {
362 return &request{
363 Model: cmp.Or(s.Model, DefaultModel),
364 Messages: mapped(r.Messages, fromLLMMessage),
365 MaxTokens: cmp.Or(s.MaxTokens, DefaultMaxTokens),
366 ToolChoice: fromLLMToolChoice(r.ToolChoice),
367 Tools: mapped(r.Tools, fromLLMTool),
368 System: mapped(r.System, fromLLMSystem),
369 }
370}
371
372func toLLMUsage(u usage) llm.Usage {
373 return llm.Usage{
374 InputTokens: u.InputTokens,
375 CacheCreationInputTokens: u.CacheCreationInputTokens,
376 CacheReadInputTokens: u.CacheReadInputTokens,
377 OutputTokens: u.OutputTokens,
378 CostUSD: u.CostUSD,
379 }
380}
381
382func toLLMContent(c content) llm.Content {
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700383 // Convert toolResult from []content to []llm.Content
384 var toolResultContents []llm.Content
385 if len(c.ToolResult) > 0 {
386 toolResultContents = make([]llm.Content, len(c.ToolResult))
387 for i, tr := range c.ToolResult {
388 toolResultContents[i] = toLLMContent(tr)
389 }
390 }
391
392 ret := llm.Content{
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700393 ID: c.ID,
394 Type: toLLMContentType[c.Type],
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700395 MediaType: c.MediaType,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700396 Thinking: c.Thinking,
397 Data: c.Data,
398 Signature: c.Signature,
399 ToolName: c.ToolName,
400 ToolInput: c.ToolInput,
401 ToolUseID: c.ToolUseID,
402 ToolError: c.ToolError,
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700403 ToolResult: toolResultContents,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700404 }
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700405 if c.Text != nil {
406 ret.Text = *c.Text
407 }
408 return ret
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700409}
410
411func toLLMResponse(r *response) *llm.Response {
412 return &llm.Response{
413 ID: r.ID,
414 Type: r.Type,
415 Role: toLLMRole[r.Role],
416 Model: r.Model,
417 Content: mapped(r.Content, toLLMContent),
418 StopReason: toLLMStopReason[r.StopReason],
419 StopSequence: r.StopSequence,
420 Usage: toLLMUsage(r.Usage),
421 }
422}
423
424// Do sends a request to Anthropic.
425func (s *Service) Do(ctx context.Context, ir *llm.Request) (*llm.Response, error) {
426 request := s.fromLLMRequest(ir)
427
428 var payload []byte
429 var err error
430 if dumpText || testing.Testing() {
431 payload, err = json.MarshalIndent(request, "", " ")
432 } else {
433 payload, err = json.Marshal(request)
434 payload = append(payload, '\n')
435 }
436 if err != nil {
437 return nil, err
438 }
439
440 if false {
441 fmt.Printf("claude request payload:\n%s\n", payload)
442 }
443
444 backoff := []time.Duration{15 * time.Second, 30 * time.Second, time.Minute}
445 largerMaxTokens := false
446 var partialUsage usage
447
448 url := cmp.Or(s.URL, DefaultURL)
449 httpc := cmp.Or(s.HTTPC, http.DefaultClient)
450
451 // retry loop
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -0700452 var errs error // accumulated errors across all attempts
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700453 for attempts := 0; ; attempts++ {
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -0700454 if attempts > 10 {
455 return nil, fmt.Errorf("anthropic request failed after %d attempts: %w", attempts, errs)
456 }
457 if attempts > 0 {
458 sleep := backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
459 slog.WarnContext(ctx, "anthropic request sleep before retry", "sleep", sleep, "attempts", attempts)
460 time.Sleep(sleep)
461 }
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700462 if dumpText {
463 fmt.Printf("RAW REQUEST:\n%s\n\n", payload)
464 }
465 req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(payload))
466 if err != nil {
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -0700467 return nil, errors.Join(errs, err)
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700468 }
469
470 req.Header.Set("Content-Type", "application/json")
471 req.Header.Set("X-API-Key", s.APIKey)
472 req.Header.Set("Anthropic-Version", "2023-06-01")
473
474 var features []string
475 if request.TokenEfficientToolUse {
476 features = append(features, "token-efficient-tool-use-2025-02-19")
477 }
478 if largerMaxTokens {
479 features = append(features, "output-128k-2025-02-19")
480 request.MaxTokens = 128 * 1024
481 }
482 if len(features) > 0 {
483 req.Header.Set("anthropic-beta", strings.Join(features, ","))
484 }
485
486 resp, err := httpc.Do(req)
487 if err != nil {
Josh Bleecher Snyder3b5646f2025-05-23 16:47:53 +0000488 // Don't retry httprr cache misses
489 if strings.Contains(err.Error(), "cached HTTP response not found") {
490 return nil, err
491 }
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -0700492 errs = errors.Join(errs, err)
493 continue
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700494 }
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -0700495 buf, err := io.ReadAll(resp.Body)
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700496 resp.Body.Close()
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -0700497 if err != nil {
498 errs = errors.Join(errs, err)
499 continue
500 }
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700501
502 switch {
503 case resp.StatusCode == http.StatusOK:
504 if dumpText {
505 fmt.Printf("RAW RESPONSE:\n%s\n\n", buf)
506 }
507 var response response
508 err = json.NewDecoder(bytes.NewReader(buf)).Decode(&response)
509 if err != nil {
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -0700510 return nil, errors.Join(errs, err)
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700511 }
512 if response.StopReason == "max_tokens" && !largerMaxTokens {
Josh Bleecher Snyder29fea842025-05-06 01:51:09 +0000513 slog.InfoContext(ctx, "anthropic_retrying_with_larger_tokens", "message", "Retrying Anthropic API call with larger max tokens size")
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700514 // Retry with more output tokens.
515 largerMaxTokens = true
516 response.Usage.CostUSD = response.TotalDollars()
517 partialUsage = response.Usage
518 continue
519 }
520
521 // Calculate and set the cost_usd field
522 if largerMaxTokens {
523 response.Usage.Add(partialUsage)
524 }
525 response.Usage.CostUSD = response.TotalDollars()
526
527 return toLLMResponse(&response), nil
528 case resp.StatusCode >= 500 && resp.StatusCode < 600:
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -0700529 // server error, retry
530 slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode)
531 errs = errors.Join(errs, fmt.Errorf("status %v: %s", resp.Status, buf))
532 continue
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700533 case resp.StatusCode == 429:
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -0700534 // rate limited, retry
535 slog.WarnContext(ctx, "anthropic_request_rate_limited", "response", string(buf))
536 errs = errors.Join(errs, fmt.Errorf("status %v: %s", resp.Status, buf))
537 continue
538 case resp.StatusCode >= 400 && resp.StatusCode < 500:
539 // some other 400, probably unrecoverable
540 slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode)
541 return nil, errors.Join(errs, fmt.Errorf("status %v: %s", resp.Status, buf))
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700542 default:
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -0700543 // ...retry, I guess?
544 slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode)
545 errs = errors.Join(errs, fmt.Errorf("status %v: %s", resp.Status, buf))
546 continue
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700547 }
548 }
549}
550
551// cents per million tokens
552// (not dollars because i'm twitchy about using floats for money)
553type centsPer1MTokens struct {
554 Input uint64
555 Output uint64
556 CacheRead uint64
557 CacheCreation uint64
558}
559
560// https://www.anthropic.com/pricing#anthropic-api
561var modelCost = map[string]centsPer1MTokens{
562 Claude37Sonnet: {
563 Input: 300, // $3
564 Output: 1500, // $15
565 CacheRead: 30, // $0.30
566 CacheCreation: 375, // $3.75
567 },
568 Claude35Haiku: {
569 Input: 80, // $0.80
570 Output: 400, // $4.00
571 CacheRead: 8, // $0.08
572 CacheCreation: 100, // $1.00
573 },
574 Claude35Sonnet: {
575 Input: 300, // $3
576 Output: 1500, // $15
577 CacheRead: 30, // $0.30
578 CacheCreation: 375, // $3.75
579 },
Josh Bleecher Snyder0e8073a2025-05-22 21:04:51 -0700580 Claude4Sonnet: {
581 Input: 300, // $3
582 Output: 1500, // $15
583 CacheRead: 30, // $0.30
584 CacheCreation: 375, // $3.75
585 },
586 Claude4Opus: {
587 Input: 1500, // $15
588 Output: 7500, // $75
589 CacheRead: 150, // $1.50
590 CacheCreation: 1875, // $18.75
591 },
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700592}
593
594// TotalDollars returns the total cost to obtain this response, in dollars.
595func (mr *response) TotalDollars() float64 {
596 cpm, ok := modelCost[mr.Model]
597 if !ok {
598 panic(fmt.Sprintf("no pricing info for model: %s", mr.Model))
599 }
600 use := mr.Usage
601 megaCents := use.InputTokens*cpm.Input +
602 use.OutputTokens*cpm.Output +
603 use.CacheReadInputTokens*cpm.CacheRead +
604 use.CacheCreationInputTokens*cpm.CacheCreation
605 cents := float64(megaCents) / 1_000_000.0
606 return cents / 100.0
607}