blob: fdf2fde7089c1110a33148efcf883175207093fa [file] [log] [blame]
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -07001package ant
2
3import (
4 "bytes"
5 "cmp"
6 "context"
7 "encoding/json"
8 "fmt"
9 "io"
10 "log/slog"
11 "math/rand/v2"
12 "net/http"
13 "strings"
14 "testing"
15 "time"
16
17 "sketch.dev/llm"
18)
19
20const (
21 DefaultModel = Claude37Sonnet
22 // See https://docs.anthropic.com/en/docs/about-claude/models/all-models for
23 // current maximums. There's currently a flag to enable 128k output (output-128k-2025-02-19)
24 DefaultMaxTokens = 8192
25 DefaultURL = "https://api.anthropic.com/v1/messages"
26)
27
28const (
29 Claude35Sonnet = "claude-3-5-sonnet-20241022"
30 Claude35Haiku = "claude-3-5-haiku-20241022"
31 Claude37Sonnet = "claude-3-7-sonnet-20250219"
32)
33
34// Service provides Claude completions.
35// Fields should not be altered concurrently with calling any method on Service.
36type Service struct {
37 HTTPC *http.Client // defaults to http.DefaultClient if nil
38 URL string // defaults to DefaultURL if empty
39 APIKey string // must be non-empty
40 Model string // defaults to DefaultModel if empty
41 MaxTokens int // defaults to DefaultMaxTokens if zero
42}
43
44var _ llm.Service = (*Service)(nil)
45
46type content struct {
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070047 // https://docs.anthropic.com/en/api/messages
48 ID string `json:"id,omitempty"`
49 Type string `json:"type,omitempty"`
Philip Zeyliger72252cb2025-05-10 17:00:08 -070050
51 // Subtly, an empty string appears in tool results often, so we have
52 // to distinguish between empty string and no string.
53 // Underlying error looks like one of:
54 // "messages.46.content.0.tool_result.content.0.text.text: Field required""
55 // "messages.1.content.1.tool_use.text: Extra inputs are not permitted"
56 //
57 // I haven't found a super great source for the API, but
58 // https://github.com/anthropics/anthropic-sdk-typescript/blob/main/src/resources/messages/messages.ts
59 // is somewhat acceptable but hard to read.
60 Text *string `json:"text,omitempty"`
61 MediaType string `json:"media_type,omitempty"` // for image
62 Source json.RawMessage `json:"source,omitempty"` // for image
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070063
64 // for thinking
65 Thinking string `json:"thinking,omitempty"`
Philip Zeyliger72252cb2025-05-10 17:00:08 -070066 Data string `json:"data,omitempty"` // for redacted_thinking or image
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070067 Signature string `json:"signature,omitempty"` // for thinking
68
69 // for tool_use
70 ToolName string `json:"name,omitempty"`
71 ToolInput json.RawMessage `json:"input,omitempty"`
72
73 // for tool_result
Philip Zeyliger72252cb2025-05-10 17:00:08 -070074 ToolUseID string `json:"tool_use_id,omitempty"`
75 ToolError bool `json:"is_error,omitempty"`
76 // note the recursive nature here; message looks like:
77 // {
78 // "role": "user",
79 // "content": [
80 // {
81 // "type": "tool_result",
82 // "tool_use_id": "toolu_01A09q90qw90lq917835lq9",
83 // "content": [
84 // {"type": "text", "text": "15 degrees"},
85 // {
86 // "type": "image",
87 // "source": {
88 // "type": "base64",
89 // "media_type": "image/jpeg",
90 // "data": "/9j/4AAQSkZJRg...",
91 // }
92 // }
93 // ]
94 // }
95 // ]
96 //}
97 ToolResult []content `json:"content,omitempty"`
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070098
99 // timing information for tool_result; not sent to Claude
100 StartTime *time.Time `json:"-"`
101 EndTime *time.Time `json:"-"`
102
103 CacheControl json.RawMessage `json:"cache_control,omitempty"`
104}
105
106// message represents a message in the conversation.
107type message struct {
108 Role string `json:"role"`
109 Content []content `json:"content"`
110 ToolUse *toolUse `json:"tool_use,omitempty"` // use to control whether/which tool to use
111}
112
113// toolUse represents a tool use in the message content.
114type toolUse struct {
115 ID string `json:"id"`
116 Name string `json:"name"`
117}
118
119// tool represents a tool available to Claude.
120type tool struct {
121 Name string `json:"name"`
122 // Type is used by the text editor tool; see
123 // https://docs.anthropic.com/en/docs/build-with-claude/tool-use/text-editor-tool
124 Type string `json:"type,omitempty"`
125 Description string `json:"description,omitempty"`
126 InputSchema json.RawMessage `json:"input_schema,omitempty"`
127}
128
129// usage represents the billing and rate-limit usage.
130type usage struct {
131 InputTokens uint64 `json:"input_tokens"`
132 CacheCreationInputTokens uint64 `json:"cache_creation_input_tokens"`
133 CacheReadInputTokens uint64 `json:"cache_read_input_tokens"`
134 OutputTokens uint64 `json:"output_tokens"`
135 CostUSD float64 `json:"cost_usd"`
136}
137
138func (u *usage) Add(other usage) {
139 u.InputTokens += other.InputTokens
140 u.CacheCreationInputTokens += other.CacheCreationInputTokens
141 u.CacheReadInputTokens += other.CacheReadInputTokens
142 u.OutputTokens += other.OutputTokens
143 u.CostUSD += other.CostUSD
144}
145
146type errorResponse struct {
147 Type string `json:"type"`
148 Message string `json:"message"`
149}
150
151// response represents the response from the message API.
152type response struct {
153 ID string `json:"id"`
154 Type string `json:"type"`
155 Role string `json:"role"`
156 Model string `json:"model"`
157 Content []content `json:"content"`
158 StopReason string `json:"stop_reason"`
159 StopSequence *string `json:"stop_sequence,omitempty"`
160 Usage usage `json:"usage"`
161}
162
163type toolChoice struct {
164 Type string `json:"type"`
165 Name string `json:"name,omitempty"`
166}
167
168// https://docs.anthropic.com/en/api/messages#body-system
169type systemContent struct {
170 Text string `json:"text,omitempty"`
171 Type string `json:"type,omitempty"`
172 CacheControl json.RawMessage `json:"cache_control,omitempty"`
173}
174
175// request represents the request payload for creating a message.
176type request struct {
177 Model string `json:"model"`
178 Messages []message `json:"messages"`
179 ToolChoice *toolChoice `json:"tool_choice,omitempty"`
180 MaxTokens int `json:"max_tokens"`
181 Tools []*tool `json:"tools,omitempty"`
182 Stream bool `json:"stream,omitempty"`
183 System []systemContent `json:"system,omitempty"`
184 Temperature float64 `json:"temperature,omitempty"`
185 TopK int `json:"top_k,omitempty"`
186 TopP float64 `json:"top_p,omitempty"`
187 StopSequences []string `json:"stop_sequences,omitempty"`
188
189 TokenEfficientToolUse bool `json:"-"` // DO NOT USE, broken on Anthropic's side as of 2025-02-28
190}
191
192const dumpText = false // debugging toggle to see raw communications with Claude
193
194func mapped[Slice ~[]E, E, T any](s Slice, f func(E) T) []T {
195 out := make([]T, len(s))
196 for i, v := range s {
197 out[i] = f(v)
198 }
199 return out
200}
201
202func inverted[K, V cmp.Ordered](m map[K]V) map[V]K {
203 inv := make(map[V]K)
204 for k, v := range m {
205 if _, ok := inv[v]; ok {
206 panic(fmt.Errorf("inverted map has multiple keys for value %v", v))
207 }
208 inv[v] = k
209 }
210 return inv
211}
212
213var (
214 fromLLMRole = map[llm.MessageRole]string{
215 llm.MessageRoleAssistant: "assistant",
216 llm.MessageRoleUser: "user",
217 }
218 toLLMRole = inverted(fromLLMRole)
219
220 fromLLMContentType = map[llm.ContentType]string{
221 llm.ContentTypeText: "text",
222 llm.ContentTypeThinking: "thinking",
223 llm.ContentTypeRedactedThinking: "redacted_thinking",
224 llm.ContentTypeToolUse: "tool_use",
225 llm.ContentTypeToolResult: "tool_result",
226 }
227 toLLMContentType = inverted(fromLLMContentType)
228
229 fromLLMToolChoiceType = map[llm.ToolChoiceType]string{
230 llm.ToolChoiceTypeAuto: "auto",
231 llm.ToolChoiceTypeAny: "any",
232 llm.ToolChoiceTypeNone: "none",
233 llm.ToolChoiceTypeTool: "tool",
234 }
235
236 toLLMStopReason = map[string]llm.StopReason{
237 "stop_sequence": llm.StopReasonStopSequence,
238 "max_tokens": llm.StopReasonMaxTokens,
239 "end_turn": llm.StopReasonEndTurn,
240 "tool_use": llm.StopReasonToolUse,
241 }
242)
243
244func fromLLMCache(c bool) json.RawMessage {
245 if !c {
246 return nil
247 }
248 return json.RawMessage(`{"type":"ephemeral"}`)
249}
250
251func fromLLMContent(c llm.Content) content {
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700252 var toolResult []content
253 if len(c.ToolResult) > 0 {
254 toolResult = make([]content, len(c.ToolResult))
255 for i, tr := range c.ToolResult {
256 // For image content inside a tool_result, we need to map it to "image" type
257 if tr.MediaType != "" && tr.MediaType == "image/jpeg" || tr.MediaType == "image/png" {
258 // Format as an image for Claude
259 toolResult[i] = content{
260 Type: "image",
261 Source: json.RawMessage(fmt.Sprintf(`{"type":"base64","media_type":"%s","data":"%s"}`,
262 tr.MediaType, tr.Data)),
263 }
264 } else {
265 toolResult[i] = fromLLMContent(tr)
266 }
267 }
268 }
269
270 d := content{
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700271 ID: c.ID,
272 Type: fromLLMContentType[c.Type],
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700273 MediaType: c.MediaType,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700274 Thinking: c.Thinking,
275 Data: c.Data,
276 Signature: c.Signature,
277 ToolName: c.ToolName,
278 ToolInput: c.ToolInput,
279 ToolUseID: c.ToolUseID,
280 ToolError: c.ToolError,
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700281 ToolResult: toolResult,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700282 CacheControl: fromLLMCache(c.Cache),
283 }
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700284 // Anthropic API complains if Text is specified when it shouldn't be
285 // or not specified when it's the empty string.
286 if c.Type != llm.ContentTypeToolResult && c.Type != llm.ContentTypeToolUse {
287 d.Text = &c.Text
288 }
289 return d
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700290}
291
292func fromLLMToolUse(tu *llm.ToolUse) *toolUse {
293 if tu == nil {
294 return nil
295 }
296 return &toolUse{
297 ID: tu.ID,
298 Name: tu.Name,
299 }
300}
301
302func fromLLMMessage(msg llm.Message) message {
303 return message{
304 Role: fromLLMRole[msg.Role],
305 Content: mapped(msg.Content, fromLLMContent),
306 ToolUse: fromLLMToolUse(msg.ToolUse),
307 }
308}
309
310func fromLLMToolChoice(tc *llm.ToolChoice) *toolChoice {
311 if tc == nil {
312 return nil
313 }
314 return &toolChoice{
315 Type: fromLLMToolChoiceType[tc.Type],
316 Name: tc.Name,
317 }
318}
319
320func fromLLMTool(t *llm.Tool) *tool {
321 return &tool{
322 Name: t.Name,
323 Type: t.Type,
324 Description: t.Description,
325 InputSchema: t.InputSchema,
326 }
327}
328
329func fromLLMSystem(s llm.SystemContent) systemContent {
330 return systemContent{
331 Text: s.Text,
332 Type: s.Type,
333 CacheControl: fromLLMCache(s.Cache),
334 }
335}
336
337func (s *Service) fromLLMRequest(r *llm.Request) *request {
338 return &request{
339 Model: cmp.Or(s.Model, DefaultModel),
340 Messages: mapped(r.Messages, fromLLMMessage),
341 MaxTokens: cmp.Or(s.MaxTokens, DefaultMaxTokens),
342 ToolChoice: fromLLMToolChoice(r.ToolChoice),
343 Tools: mapped(r.Tools, fromLLMTool),
344 System: mapped(r.System, fromLLMSystem),
345 }
346}
347
348func toLLMUsage(u usage) llm.Usage {
349 return llm.Usage{
350 InputTokens: u.InputTokens,
351 CacheCreationInputTokens: u.CacheCreationInputTokens,
352 CacheReadInputTokens: u.CacheReadInputTokens,
353 OutputTokens: u.OutputTokens,
354 CostUSD: u.CostUSD,
355 }
356}
357
358func toLLMContent(c content) llm.Content {
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700359 // Convert toolResult from []content to []llm.Content
360 var toolResultContents []llm.Content
361 if len(c.ToolResult) > 0 {
362 toolResultContents = make([]llm.Content, len(c.ToolResult))
363 for i, tr := range c.ToolResult {
364 toolResultContents[i] = toLLMContent(tr)
365 }
366 }
367
368 ret := llm.Content{
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700369 ID: c.ID,
370 Type: toLLMContentType[c.Type],
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700371 MediaType: c.MediaType,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700372 Thinking: c.Thinking,
373 Data: c.Data,
374 Signature: c.Signature,
375 ToolName: c.ToolName,
376 ToolInput: c.ToolInput,
377 ToolUseID: c.ToolUseID,
378 ToolError: c.ToolError,
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700379 ToolResult: toolResultContents,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700380 }
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700381 if c.Text != nil {
382 ret.Text = *c.Text
383 }
384 return ret
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700385}
386
387func toLLMResponse(r *response) *llm.Response {
388 return &llm.Response{
389 ID: r.ID,
390 Type: r.Type,
391 Role: toLLMRole[r.Role],
392 Model: r.Model,
393 Content: mapped(r.Content, toLLMContent),
394 StopReason: toLLMStopReason[r.StopReason],
395 StopSequence: r.StopSequence,
396 Usage: toLLMUsage(r.Usage),
397 }
398}
399
400// Do sends a request to Anthropic.
401func (s *Service) Do(ctx context.Context, ir *llm.Request) (*llm.Response, error) {
402 request := s.fromLLMRequest(ir)
403
404 var payload []byte
405 var err error
406 if dumpText || testing.Testing() {
407 payload, err = json.MarshalIndent(request, "", " ")
408 } else {
409 payload, err = json.Marshal(request)
410 payload = append(payload, '\n')
411 }
412 if err != nil {
413 return nil, err
414 }
415
416 if false {
417 fmt.Printf("claude request payload:\n%s\n", payload)
418 }
419
420 backoff := []time.Duration{15 * time.Second, 30 * time.Second, time.Minute}
421 largerMaxTokens := false
422 var partialUsage usage
423
424 url := cmp.Or(s.URL, DefaultURL)
425 httpc := cmp.Or(s.HTTPC, http.DefaultClient)
426
427 // retry loop
428 for attempts := 0; ; attempts++ {
429 if dumpText {
430 fmt.Printf("RAW REQUEST:\n%s\n\n", payload)
431 }
432 req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(payload))
433 if err != nil {
434 return nil, err
435 }
436
437 req.Header.Set("Content-Type", "application/json")
438 req.Header.Set("X-API-Key", s.APIKey)
439 req.Header.Set("Anthropic-Version", "2023-06-01")
440
441 var features []string
442 if request.TokenEfficientToolUse {
443 features = append(features, "token-efficient-tool-use-2025-02-19")
444 }
445 if largerMaxTokens {
446 features = append(features, "output-128k-2025-02-19")
447 request.MaxTokens = 128 * 1024
448 }
449 if len(features) > 0 {
450 req.Header.Set("anthropic-beta", strings.Join(features, ","))
451 }
452
453 resp, err := httpc.Do(req)
454 if err != nil {
455 return nil, err
456 }
457 buf, _ := io.ReadAll(resp.Body)
458 resp.Body.Close()
459
460 switch {
461 case resp.StatusCode == http.StatusOK:
462 if dumpText {
463 fmt.Printf("RAW RESPONSE:\n%s\n\n", buf)
464 }
465 var response response
466 err = json.NewDecoder(bytes.NewReader(buf)).Decode(&response)
467 if err != nil {
468 return nil, err
469 }
470 if response.StopReason == "max_tokens" && !largerMaxTokens {
Josh Bleecher Snyder29fea842025-05-06 01:51:09 +0000471 slog.InfoContext(ctx, "anthropic_retrying_with_larger_tokens", "message", "Retrying Anthropic API call with larger max tokens size")
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700472 // Retry with more output tokens.
473 largerMaxTokens = true
474 response.Usage.CostUSD = response.TotalDollars()
475 partialUsage = response.Usage
476 continue
477 }
478
479 // Calculate and set the cost_usd field
480 if largerMaxTokens {
481 response.Usage.Add(partialUsage)
482 }
483 response.Usage.CostUSD = response.TotalDollars()
484
485 return toLLMResponse(&response), nil
486 case resp.StatusCode >= 500 && resp.StatusCode < 600:
487 // overloaded or unhappy, in one form or another
488 sleep := backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
489 slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode, "sleep", sleep)
490 time.Sleep(sleep)
491 case resp.StatusCode == 429:
492 // rate limited. wait 1 minute as a starting point, because that's the rate limiting window.
493 // and then add some additional time for backoff.
494 sleep := time.Minute + backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
495 slog.WarnContext(ctx, "anthropic_request_rate_limited", "response", string(buf), "sleep", sleep)
496 time.Sleep(sleep)
497 // case resp.StatusCode == 400:
498 // TODO: parse ErrorResponse, make (*ErrorResponse) implement error
499 default:
500 return nil, fmt.Errorf("API request failed with status %s\n%s", resp.Status, buf)
501 }
502 }
503}
504
505// cents per million tokens
506// (not dollars because i'm twitchy about using floats for money)
507type centsPer1MTokens struct {
508 Input uint64
509 Output uint64
510 CacheRead uint64
511 CacheCreation uint64
512}
513
514// https://www.anthropic.com/pricing#anthropic-api
515var modelCost = map[string]centsPer1MTokens{
516 Claude37Sonnet: {
517 Input: 300, // $3
518 Output: 1500, // $15
519 CacheRead: 30, // $0.30
520 CacheCreation: 375, // $3.75
521 },
522 Claude35Haiku: {
523 Input: 80, // $0.80
524 Output: 400, // $4.00
525 CacheRead: 8, // $0.08
526 CacheCreation: 100, // $1.00
527 },
528 Claude35Sonnet: {
529 Input: 300, // $3
530 Output: 1500, // $15
531 CacheRead: 30, // $0.30
532 CacheCreation: 375, // $3.75
533 },
534}
535
536// TotalDollars returns the total cost to obtain this response, in dollars.
537func (mr *response) TotalDollars() float64 {
538 cpm, ok := modelCost[mr.Model]
539 if !ok {
540 panic(fmt.Sprintf("no pricing info for model: %s", mr.Model))
541 }
542 use := mr.Usage
543 megaCents := use.InputTokens*cpm.Input +
544 use.OutputTokens*cpm.Output +
545 use.CacheReadInputTokens*cpm.CacheRead +
546 use.CacheCreationInputTokens*cpm.CacheCreation
547 cents := float64(megaCents) / 1_000_000.0
548 return cents / 100.0
549}