blob: d2d3f3e9213eaa00db7aa43c469b61d2f0ecbc41 [file] [log] [blame]
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -07001package ant
2
3import (
4 "bytes"
5 "cmp"
6 "context"
7 "encoding/json"
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -07008 "errors"
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -07009 "fmt"
10 "io"
11 "log/slog"
12 "math/rand/v2"
13 "net/http"
14 "strings"
15 "testing"
16 "time"
17
18 "sketch.dev/llm"
19)
20
21const (
22 DefaultModel = Claude37Sonnet
23 // See https://docs.anthropic.com/en/docs/about-claude/models/all-models for
24 // current maximums. There's currently a flag to enable 128k output (output-128k-2025-02-19)
25 DefaultMaxTokens = 8192
26 DefaultURL = "https://api.anthropic.com/v1/messages"
27)
28
29const (
30 Claude35Sonnet = "claude-3-5-sonnet-20241022"
31 Claude35Haiku = "claude-3-5-haiku-20241022"
32 Claude37Sonnet = "claude-3-7-sonnet-20250219"
33)
34
35// Service provides Claude completions.
36// Fields should not be altered concurrently with calling any method on Service.
37type Service struct {
38 HTTPC *http.Client // defaults to http.DefaultClient if nil
39 URL string // defaults to DefaultURL if empty
40 APIKey string // must be non-empty
41 Model string // defaults to DefaultModel if empty
42 MaxTokens int // defaults to DefaultMaxTokens if zero
43}
44
45var _ llm.Service = (*Service)(nil)
46
47type content struct {
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070048 // https://docs.anthropic.com/en/api/messages
49 ID string `json:"id,omitempty"`
50 Type string `json:"type,omitempty"`
Philip Zeyliger72252cb2025-05-10 17:00:08 -070051
52 // Subtly, an empty string appears in tool results often, so we have
53 // to distinguish between empty string and no string.
54 // Underlying error looks like one of:
55 // "messages.46.content.0.tool_result.content.0.text.text: Field required""
56 // "messages.1.content.1.tool_use.text: Extra inputs are not permitted"
57 //
58 // I haven't found a super great source for the API, but
59 // https://github.com/anthropics/anthropic-sdk-typescript/blob/main/src/resources/messages/messages.ts
60 // is somewhat acceptable but hard to read.
61 Text *string `json:"text,omitempty"`
62 MediaType string `json:"media_type,omitempty"` // for image
63 Source json.RawMessage `json:"source,omitempty"` // for image
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070064
65 // for thinking
66 Thinking string `json:"thinking,omitempty"`
Philip Zeyliger72252cb2025-05-10 17:00:08 -070067 Data string `json:"data,omitempty"` // for redacted_thinking or image
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070068 Signature string `json:"signature,omitempty"` // for thinking
69
70 // for tool_use
71 ToolName string `json:"name,omitempty"`
72 ToolInput json.RawMessage `json:"input,omitempty"`
73
74 // for tool_result
Philip Zeyliger72252cb2025-05-10 17:00:08 -070075 ToolUseID string `json:"tool_use_id,omitempty"`
76 ToolError bool `json:"is_error,omitempty"`
77 // note the recursive nature here; message looks like:
78 // {
79 // "role": "user",
80 // "content": [
81 // {
82 // "type": "tool_result",
83 // "tool_use_id": "toolu_01A09q90qw90lq917835lq9",
84 // "content": [
85 // {"type": "text", "text": "15 degrees"},
86 // {
87 // "type": "image",
88 // "source": {
89 // "type": "base64",
90 // "media_type": "image/jpeg",
91 // "data": "/9j/4AAQSkZJRg...",
92 // }
93 // }
94 // ]
95 // }
96 // ]
97 //}
98 ToolResult []content `json:"content,omitempty"`
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070099
100 // timing information for tool_result; not sent to Claude
101 StartTime *time.Time `json:"-"`
102 EndTime *time.Time `json:"-"`
103
104 CacheControl json.RawMessage `json:"cache_control,omitempty"`
105}
106
107// message represents a message in the conversation.
108type message struct {
109 Role string `json:"role"`
110 Content []content `json:"content"`
111 ToolUse *toolUse `json:"tool_use,omitempty"` // use to control whether/which tool to use
112}
113
114// toolUse represents a tool use in the message content.
115type toolUse struct {
116 ID string `json:"id"`
117 Name string `json:"name"`
118}
119
120// tool represents a tool available to Claude.
121type tool struct {
122 Name string `json:"name"`
123 // Type is used by the text editor tool; see
124 // https://docs.anthropic.com/en/docs/build-with-claude/tool-use/text-editor-tool
125 Type string `json:"type,omitempty"`
126 Description string `json:"description,omitempty"`
127 InputSchema json.RawMessage `json:"input_schema,omitempty"`
128}
129
130// usage represents the billing and rate-limit usage.
131type usage struct {
132 InputTokens uint64 `json:"input_tokens"`
133 CacheCreationInputTokens uint64 `json:"cache_creation_input_tokens"`
134 CacheReadInputTokens uint64 `json:"cache_read_input_tokens"`
135 OutputTokens uint64 `json:"output_tokens"`
136 CostUSD float64 `json:"cost_usd"`
137}
138
139func (u *usage) Add(other usage) {
140 u.InputTokens += other.InputTokens
141 u.CacheCreationInputTokens += other.CacheCreationInputTokens
142 u.CacheReadInputTokens += other.CacheReadInputTokens
143 u.OutputTokens += other.OutputTokens
144 u.CostUSD += other.CostUSD
145}
146
147type errorResponse struct {
148 Type string `json:"type"`
149 Message string `json:"message"`
150}
151
152// response represents the response from the message API.
153type response struct {
154 ID string `json:"id"`
155 Type string `json:"type"`
156 Role string `json:"role"`
157 Model string `json:"model"`
158 Content []content `json:"content"`
159 StopReason string `json:"stop_reason"`
160 StopSequence *string `json:"stop_sequence,omitempty"`
161 Usage usage `json:"usage"`
162}
163
164type toolChoice struct {
165 Type string `json:"type"`
166 Name string `json:"name,omitempty"`
167}
168
169// https://docs.anthropic.com/en/api/messages#body-system
170type systemContent struct {
171 Text string `json:"text,omitempty"`
172 Type string `json:"type,omitempty"`
173 CacheControl json.RawMessage `json:"cache_control,omitempty"`
174}
175
176// request represents the request payload for creating a message.
177type request struct {
178 Model string `json:"model"`
179 Messages []message `json:"messages"`
180 ToolChoice *toolChoice `json:"tool_choice,omitempty"`
181 MaxTokens int `json:"max_tokens"`
182 Tools []*tool `json:"tools,omitempty"`
183 Stream bool `json:"stream,omitempty"`
184 System []systemContent `json:"system,omitempty"`
185 Temperature float64 `json:"temperature,omitempty"`
186 TopK int `json:"top_k,omitempty"`
187 TopP float64 `json:"top_p,omitempty"`
188 StopSequences []string `json:"stop_sequences,omitempty"`
189
190 TokenEfficientToolUse bool `json:"-"` // DO NOT USE, broken on Anthropic's side as of 2025-02-28
191}
192
193const dumpText = false // debugging toggle to see raw communications with Claude
194
195func mapped[Slice ~[]E, E, T any](s Slice, f func(E) T) []T {
196 out := make([]T, len(s))
197 for i, v := range s {
198 out[i] = f(v)
199 }
200 return out
201}
202
203func inverted[K, V cmp.Ordered](m map[K]V) map[V]K {
204 inv := make(map[V]K)
205 for k, v := range m {
206 if _, ok := inv[v]; ok {
207 panic(fmt.Errorf("inverted map has multiple keys for value %v", v))
208 }
209 inv[v] = k
210 }
211 return inv
212}
213
214var (
215 fromLLMRole = map[llm.MessageRole]string{
216 llm.MessageRoleAssistant: "assistant",
217 llm.MessageRoleUser: "user",
218 }
219 toLLMRole = inverted(fromLLMRole)
220
221 fromLLMContentType = map[llm.ContentType]string{
222 llm.ContentTypeText: "text",
223 llm.ContentTypeThinking: "thinking",
224 llm.ContentTypeRedactedThinking: "redacted_thinking",
225 llm.ContentTypeToolUse: "tool_use",
226 llm.ContentTypeToolResult: "tool_result",
227 }
228 toLLMContentType = inverted(fromLLMContentType)
229
230 fromLLMToolChoiceType = map[llm.ToolChoiceType]string{
231 llm.ToolChoiceTypeAuto: "auto",
232 llm.ToolChoiceTypeAny: "any",
233 llm.ToolChoiceTypeNone: "none",
234 llm.ToolChoiceTypeTool: "tool",
235 }
236
237 toLLMStopReason = map[string]llm.StopReason{
238 "stop_sequence": llm.StopReasonStopSequence,
239 "max_tokens": llm.StopReasonMaxTokens,
240 "end_turn": llm.StopReasonEndTurn,
241 "tool_use": llm.StopReasonToolUse,
242 }
243)
244
245func fromLLMCache(c bool) json.RawMessage {
246 if !c {
247 return nil
248 }
249 return json.RawMessage(`{"type":"ephemeral"}`)
250}
251
252func fromLLMContent(c llm.Content) content {
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700253 var toolResult []content
254 if len(c.ToolResult) > 0 {
255 toolResult = make([]content, len(c.ToolResult))
256 for i, tr := range c.ToolResult {
257 // For image content inside a tool_result, we need to map it to "image" type
258 if tr.MediaType != "" && tr.MediaType == "image/jpeg" || tr.MediaType == "image/png" {
259 // Format as an image for Claude
260 toolResult[i] = content{
261 Type: "image",
262 Source: json.RawMessage(fmt.Sprintf(`{"type":"base64","media_type":"%s","data":"%s"}`,
263 tr.MediaType, tr.Data)),
264 }
265 } else {
266 toolResult[i] = fromLLMContent(tr)
267 }
268 }
269 }
270
271 d := content{
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700272 ID: c.ID,
273 Type: fromLLMContentType[c.Type],
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700274 MediaType: c.MediaType,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700275 Thinking: c.Thinking,
276 Data: c.Data,
277 Signature: c.Signature,
278 ToolName: c.ToolName,
279 ToolInput: c.ToolInput,
280 ToolUseID: c.ToolUseID,
281 ToolError: c.ToolError,
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700282 ToolResult: toolResult,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700283 CacheControl: fromLLMCache(c.Cache),
284 }
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700285 // Anthropic API complains if Text is specified when it shouldn't be
286 // or not specified when it's the empty string.
287 if c.Type != llm.ContentTypeToolResult && c.Type != llm.ContentTypeToolUse {
288 d.Text = &c.Text
289 }
290 return d
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700291}
292
293func fromLLMToolUse(tu *llm.ToolUse) *toolUse {
294 if tu == nil {
295 return nil
296 }
297 return &toolUse{
298 ID: tu.ID,
299 Name: tu.Name,
300 }
301}
302
303func fromLLMMessage(msg llm.Message) message {
304 return message{
305 Role: fromLLMRole[msg.Role],
306 Content: mapped(msg.Content, fromLLMContent),
307 ToolUse: fromLLMToolUse(msg.ToolUse),
308 }
309}
310
311func fromLLMToolChoice(tc *llm.ToolChoice) *toolChoice {
312 if tc == nil {
313 return nil
314 }
315 return &toolChoice{
316 Type: fromLLMToolChoiceType[tc.Type],
317 Name: tc.Name,
318 }
319}
320
321func fromLLMTool(t *llm.Tool) *tool {
322 return &tool{
323 Name: t.Name,
324 Type: t.Type,
325 Description: t.Description,
326 InputSchema: t.InputSchema,
327 }
328}
329
330func fromLLMSystem(s llm.SystemContent) systemContent {
331 return systemContent{
332 Text: s.Text,
333 Type: s.Type,
334 CacheControl: fromLLMCache(s.Cache),
335 }
336}
337
338func (s *Service) fromLLMRequest(r *llm.Request) *request {
339 return &request{
340 Model: cmp.Or(s.Model, DefaultModel),
341 Messages: mapped(r.Messages, fromLLMMessage),
342 MaxTokens: cmp.Or(s.MaxTokens, DefaultMaxTokens),
343 ToolChoice: fromLLMToolChoice(r.ToolChoice),
344 Tools: mapped(r.Tools, fromLLMTool),
345 System: mapped(r.System, fromLLMSystem),
346 }
347}
348
349func toLLMUsage(u usage) llm.Usage {
350 return llm.Usage{
351 InputTokens: u.InputTokens,
352 CacheCreationInputTokens: u.CacheCreationInputTokens,
353 CacheReadInputTokens: u.CacheReadInputTokens,
354 OutputTokens: u.OutputTokens,
355 CostUSD: u.CostUSD,
356 }
357}
358
359func toLLMContent(c content) llm.Content {
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700360 // Convert toolResult from []content to []llm.Content
361 var toolResultContents []llm.Content
362 if len(c.ToolResult) > 0 {
363 toolResultContents = make([]llm.Content, len(c.ToolResult))
364 for i, tr := range c.ToolResult {
365 toolResultContents[i] = toLLMContent(tr)
366 }
367 }
368
369 ret := llm.Content{
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700370 ID: c.ID,
371 Type: toLLMContentType[c.Type],
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700372 MediaType: c.MediaType,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700373 Thinking: c.Thinking,
374 Data: c.Data,
375 Signature: c.Signature,
376 ToolName: c.ToolName,
377 ToolInput: c.ToolInput,
378 ToolUseID: c.ToolUseID,
379 ToolError: c.ToolError,
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700380 ToolResult: toolResultContents,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700381 }
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700382 if c.Text != nil {
383 ret.Text = *c.Text
384 }
385 return ret
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700386}
387
388func toLLMResponse(r *response) *llm.Response {
389 return &llm.Response{
390 ID: r.ID,
391 Type: r.Type,
392 Role: toLLMRole[r.Role],
393 Model: r.Model,
394 Content: mapped(r.Content, toLLMContent),
395 StopReason: toLLMStopReason[r.StopReason],
396 StopSequence: r.StopSequence,
397 Usage: toLLMUsage(r.Usage),
398 }
399}
400
401// Do sends a request to Anthropic.
402func (s *Service) Do(ctx context.Context, ir *llm.Request) (*llm.Response, error) {
403 request := s.fromLLMRequest(ir)
404
405 var payload []byte
406 var err error
407 if dumpText || testing.Testing() {
408 payload, err = json.MarshalIndent(request, "", " ")
409 } else {
410 payload, err = json.Marshal(request)
411 payload = append(payload, '\n')
412 }
413 if err != nil {
414 return nil, err
415 }
416
417 if false {
418 fmt.Printf("claude request payload:\n%s\n", payload)
419 }
420
421 backoff := []time.Duration{15 * time.Second, 30 * time.Second, time.Minute}
422 largerMaxTokens := false
423 var partialUsage usage
424
425 url := cmp.Or(s.URL, DefaultURL)
426 httpc := cmp.Or(s.HTTPC, http.DefaultClient)
427
428 // retry loop
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -0700429 var errs error // accumulated errors across all attempts
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700430 for attempts := 0; ; attempts++ {
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -0700431 if attempts > 10 {
432 return nil, fmt.Errorf("anthropic request failed after %d attempts: %w", attempts, errs)
433 }
434 if attempts > 0 {
435 sleep := backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
436 slog.WarnContext(ctx, "anthropic request sleep before retry", "sleep", sleep, "attempts", attempts)
437 time.Sleep(sleep)
438 }
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700439 if dumpText {
440 fmt.Printf("RAW REQUEST:\n%s\n\n", payload)
441 }
442 req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(payload))
443 if err != nil {
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -0700444 return nil, errors.Join(errs, err)
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700445 }
446
447 req.Header.Set("Content-Type", "application/json")
448 req.Header.Set("X-API-Key", s.APIKey)
449 req.Header.Set("Anthropic-Version", "2023-06-01")
450
451 var features []string
452 if request.TokenEfficientToolUse {
453 features = append(features, "token-efficient-tool-use-2025-02-19")
454 }
455 if largerMaxTokens {
456 features = append(features, "output-128k-2025-02-19")
457 request.MaxTokens = 128 * 1024
458 }
459 if len(features) > 0 {
460 req.Header.Set("anthropic-beta", strings.Join(features, ","))
461 }
462
463 resp, err := httpc.Do(req)
464 if err != nil {
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -0700465 errs = errors.Join(errs, err)
466 continue
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700467 }
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -0700468 buf, err := io.ReadAll(resp.Body)
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700469 resp.Body.Close()
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -0700470 if err != nil {
471 errs = errors.Join(errs, err)
472 continue
473 }
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700474
475 switch {
476 case resp.StatusCode == http.StatusOK:
477 if dumpText {
478 fmt.Printf("RAW RESPONSE:\n%s\n\n", buf)
479 }
480 var response response
481 err = json.NewDecoder(bytes.NewReader(buf)).Decode(&response)
482 if err != nil {
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -0700483 return nil, errors.Join(errs, err)
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700484 }
485 if response.StopReason == "max_tokens" && !largerMaxTokens {
Josh Bleecher Snyder29fea842025-05-06 01:51:09 +0000486 slog.InfoContext(ctx, "anthropic_retrying_with_larger_tokens", "message", "Retrying Anthropic API call with larger max tokens size")
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700487 // Retry with more output tokens.
488 largerMaxTokens = true
489 response.Usage.CostUSD = response.TotalDollars()
490 partialUsage = response.Usage
491 continue
492 }
493
494 // Calculate and set the cost_usd field
495 if largerMaxTokens {
496 response.Usage.Add(partialUsage)
497 }
498 response.Usage.CostUSD = response.TotalDollars()
499
500 return toLLMResponse(&response), nil
501 case resp.StatusCode >= 500 && resp.StatusCode < 600:
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -0700502 // server error, retry
503 slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode)
504 errs = errors.Join(errs, fmt.Errorf("status %v: %s", resp.Status, buf))
505 continue
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700506 case resp.StatusCode == 429:
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -0700507 // rate limited, retry
508 slog.WarnContext(ctx, "anthropic_request_rate_limited", "response", string(buf))
509 errs = errors.Join(errs, fmt.Errorf("status %v: %s", resp.Status, buf))
510 continue
511 case resp.StatusCode >= 400 && resp.StatusCode < 500:
512 // some other 400, probably unrecoverable
513 slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode)
514 return nil, errors.Join(errs, fmt.Errorf("status %v: %s", resp.Status, buf))
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700515 default:
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -0700516 // ...retry, I guess?
517 slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode)
518 errs = errors.Join(errs, fmt.Errorf("status %v: %s", resp.Status, buf))
519 continue
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700520 }
521 }
522}
523
524// cents per million tokens
525// (not dollars because i'm twitchy about using floats for money)
526type centsPer1MTokens struct {
527 Input uint64
528 Output uint64
529 CacheRead uint64
530 CacheCreation uint64
531}
532
533// https://www.anthropic.com/pricing#anthropic-api
534var modelCost = map[string]centsPer1MTokens{
535 Claude37Sonnet: {
536 Input: 300, // $3
537 Output: 1500, // $15
538 CacheRead: 30, // $0.30
539 CacheCreation: 375, // $3.75
540 },
541 Claude35Haiku: {
542 Input: 80, // $0.80
543 Output: 400, // $4.00
544 CacheRead: 8, // $0.08
545 CacheCreation: 100, // $1.00
546 },
547 Claude35Sonnet: {
548 Input: 300, // $3
549 Output: 1500, // $15
550 CacheRead: 30, // $0.30
551 CacheCreation: 375, // $3.75
552 },
553}
554
555// TotalDollars returns the total cost to obtain this response, in dollars.
556func (mr *response) TotalDollars() float64 {
557 cpm, ok := modelCost[mr.Model]
558 if !ok {
559 panic(fmt.Sprintf("no pricing info for model: %s", mr.Model))
560 }
561 use := mr.Usage
562 megaCents := use.InputTokens*cpm.Input +
563 use.OutputTokens*cpm.Output +
564 use.CacheReadInputTokens*cpm.CacheRead +
565 use.CacheCreationInputTokens*cpm.CacheCreation
566 cents := float64(megaCents) / 1_000_000.0
567 return cents / 100.0
568}