blob: 0072b69e74f79187ba55131062f83162ece7cca9 [file] [log] [blame]
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -07001package ant
2
3import (
4 "bytes"
5 "cmp"
6 "context"
7 "encoding/json"
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -07008 "errors"
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -07009 "fmt"
10 "io"
11 "log/slog"
12 "math/rand/v2"
13 "net/http"
14 "strings"
15 "testing"
16 "time"
17
18 "sketch.dev/llm"
19)
20
21const (
Josh Bleecher Snyder0efb29d2025-05-22 21:05:04 -070022 DefaultModel = Claude4Sonnet
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070023 // See https://docs.anthropic.com/en/docs/about-claude/models/all-models for
24 // current maximums. There's currently a flag to enable 128k output (output-128k-2025-02-19)
25 DefaultMaxTokens = 8192
26 DefaultURL = "https://api.anthropic.com/v1/messages"
27)
28
29const (
30 Claude35Sonnet = "claude-3-5-sonnet-20241022"
31 Claude35Haiku = "claude-3-5-haiku-20241022"
32 Claude37Sonnet = "claude-3-7-sonnet-20250219"
Josh Bleecher Snyder0e8073a2025-05-22 21:04:51 -070033 Claude4Sonnet = "claude-sonnet-4-20250514"
34 Claude4Opus = "claude-opus-4-20250514"
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070035)
36
Philip Zeyligerb8a8f352025-06-02 07:39:37 -070037// TokenContextWindow returns the maximum token context window size for this service
38func (s *Service) TokenContextWindow() int {
39 model := s.Model
40 if model == "" {
41 model = DefaultModel
42 }
43
44 switch model {
45 case Claude35Sonnet, Claude37Sonnet:
46 return 200000
47 case Claude35Haiku:
48 return 200000
49 case Claude4Sonnet, Claude4Opus:
50 return 200000
51 default:
52 // Default for unknown models
53 return 200000
54 }
55}
56
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070057// Service provides Claude completions.
58// Fields should not be altered concurrently with calling any method on Service.
59type Service struct {
60 HTTPC *http.Client // defaults to http.DefaultClient if nil
61 URL string // defaults to DefaultURL if empty
62 APIKey string // must be non-empty
63 Model string // defaults to DefaultModel if empty
64 MaxTokens int // defaults to DefaultMaxTokens if zero
65}
66
67var _ llm.Service = (*Service)(nil)
68
69type content struct {
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070070 // https://docs.anthropic.com/en/api/messages
71 ID string `json:"id,omitempty"`
72 Type string `json:"type,omitempty"`
Philip Zeyliger72252cb2025-05-10 17:00:08 -070073
74 // Subtly, an empty string appears in tool results often, so we have
75 // to distinguish between empty string and no string.
76 // Underlying error looks like one of:
77 // "messages.46.content.0.tool_result.content.0.text.text: Field required""
78 // "messages.1.content.1.tool_use.text: Extra inputs are not permitted"
79 //
80 // I haven't found a super great source for the API, but
81 // https://github.com/anthropics/anthropic-sdk-typescript/blob/main/src/resources/messages/messages.ts
82 // is somewhat acceptable but hard to read.
83 Text *string `json:"text,omitempty"`
84 MediaType string `json:"media_type,omitempty"` // for image
85 Source json.RawMessage `json:"source,omitempty"` // for image
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070086
87 // for thinking
88 Thinking string `json:"thinking,omitempty"`
Philip Zeyliger72252cb2025-05-10 17:00:08 -070089 Data string `json:"data,omitempty"` // for redacted_thinking or image
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070090 Signature string `json:"signature,omitempty"` // for thinking
91
92 // for tool_use
93 ToolName string `json:"name,omitempty"`
94 ToolInput json.RawMessage `json:"input,omitempty"`
95
96 // for tool_result
Philip Zeyliger72252cb2025-05-10 17:00:08 -070097 ToolUseID string `json:"tool_use_id,omitempty"`
98 ToolError bool `json:"is_error,omitempty"`
99 // note the recursive nature here; message looks like:
100 // {
101 // "role": "user",
102 // "content": [
103 // {
104 // "type": "tool_result",
105 // "tool_use_id": "toolu_01A09q90qw90lq917835lq9",
106 // "content": [
107 // {"type": "text", "text": "15 degrees"},
108 // {
109 // "type": "image",
110 // "source": {
111 // "type": "base64",
112 // "media_type": "image/jpeg",
113 // "data": "/9j/4AAQSkZJRg...",
114 // }
115 // }
116 // ]
117 // }
118 // ]
119 //}
120 ToolResult []content `json:"content,omitempty"`
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700121
122 // timing information for tool_result; not sent to Claude
123 StartTime *time.Time `json:"-"`
124 EndTime *time.Time `json:"-"`
125
126 CacheControl json.RawMessage `json:"cache_control,omitempty"`
127}
128
129// message represents a message in the conversation.
130type message struct {
131 Role string `json:"role"`
132 Content []content `json:"content"`
133 ToolUse *toolUse `json:"tool_use,omitempty"` // use to control whether/which tool to use
134}
135
136// toolUse represents a tool use in the message content.
137type toolUse struct {
138 ID string `json:"id"`
139 Name string `json:"name"`
140}
141
142// tool represents a tool available to Claude.
143type tool struct {
144 Name string `json:"name"`
145 // Type is used by the text editor tool; see
146 // https://docs.anthropic.com/en/docs/build-with-claude/tool-use/text-editor-tool
147 Type string `json:"type,omitempty"`
148 Description string `json:"description,omitempty"`
149 InputSchema json.RawMessage `json:"input_schema,omitempty"`
150}
151
152// usage represents the billing and rate-limit usage.
153type usage struct {
154 InputTokens uint64 `json:"input_tokens"`
155 CacheCreationInputTokens uint64 `json:"cache_creation_input_tokens"`
156 CacheReadInputTokens uint64 `json:"cache_read_input_tokens"`
157 OutputTokens uint64 `json:"output_tokens"`
158 CostUSD float64 `json:"cost_usd"`
159}
160
161func (u *usage) Add(other usage) {
162 u.InputTokens += other.InputTokens
163 u.CacheCreationInputTokens += other.CacheCreationInputTokens
164 u.CacheReadInputTokens += other.CacheReadInputTokens
165 u.OutputTokens += other.OutputTokens
166 u.CostUSD += other.CostUSD
167}
168
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700169// response represents the response from the message API.
170type response struct {
171 ID string `json:"id"`
172 Type string `json:"type"`
173 Role string `json:"role"`
174 Model string `json:"model"`
175 Content []content `json:"content"`
176 StopReason string `json:"stop_reason"`
177 StopSequence *string `json:"stop_sequence,omitempty"`
178 Usage usage `json:"usage"`
179}
180
181type toolChoice struct {
182 Type string `json:"type"`
183 Name string `json:"name,omitempty"`
184}
185
186// https://docs.anthropic.com/en/api/messages#body-system
187type systemContent struct {
188 Text string `json:"text,omitempty"`
189 Type string `json:"type,omitempty"`
190 CacheControl json.RawMessage `json:"cache_control,omitempty"`
191}
192
193// request represents the request payload for creating a message.
194type request struct {
195 Model string `json:"model"`
196 Messages []message `json:"messages"`
197 ToolChoice *toolChoice `json:"tool_choice,omitempty"`
198 MaxTokens int `json:"max_tokens"`
199 Tools []*tool `json:"tools,omitempty"`
200 Stream bool `json:"stream,omitempty"`
201 System []systemContent `json:"system,omitempty"`
202 Temperature float64 `json:"temperature,omitempty"`
203 TopK int `json:"top_k,omitempty"`
204 TopP float64 `json:"top_p,omitempty"`
205 StopSequences []string `json:"stop_sequences,omitempty"`
206
207 TokenEfficientToolUse bool `json:"-"` // DO NOT USE, broken on Anthropic's side as of 2025-02-28
208}
209
210const dumpText = false // debugging toggle to see raw communications with Claude
211
212func mapped[Slice ~[]E, E, T any](s Slice, f func(E) T) []T {
213 out := make([]T, len(s))
214 for i, v := range s {
215 out[i] = f(v)
216 }
217 return out
218}
219
220func inverted[K, V cmp.Ordered](m map[K]V) map[V]K {
221 inv := make(map[V]K)
222 for k, v := range m {
223 if _, ok := inv[v]; ok {
224 panic(fmt.Errorf("inverted map has multiple keys for value %v", v))
225 }
226 inv[v] = k
227 }
228 return inv
229}
230
231var (
232 fromLLMRole = map[llm.MessageRole]string{
233 llm.MessageRoleAssistant: "assistant",
234 llm.MessageRoleUser: "user",
235 }
236 toLLMRole = inverted(fromLLMRole)
237
238 fromLLMContentType = map[llm.ContentType]string{
239 llm.ContentTypeText: "text",
240 llm.ContentTypeThinking: "thinking",
241 llm.ContentTypeRedactedThinking: "redacted_thinking",
242 llm.ContentTypeToolUse: "tool_use",
243 llm.ContentTypeToolResult: "tool_result",
244 }
245 toLLMContentType = inverted(fromLLMContentType)
246
247 fromLLMToolChoiceType = map[llm.ToolChoiceType]string{
248 llm.ToolChoiceTypeAuto: "auto",
249 llm.ToolChoiceTypeAny: "any",
250 llm.ToolChoiceTypeNone: "none",
251 llm.ToolChoiceTypeTool: "tool",
252 }
253
254 toLLMStopReason = map[string]llm.StopReason{
255 "stop_sequence": llm.StopReasonStopSequence,
256 "max_tokens": llm.StopReasonMaxTokens,
257 "end_turn": llm.StopReasonEndTurn,
258 "tool_use": llm.StopReasonToolUse,
Josh Bleecher Snyder0e8073a2025-05-22 21:04:51 -0700259 "refusal": llm.StopReasonRefusal,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700260 }
261)
262
263func fromLLMCache(c bool) json.RawMessage {
264 if !c {
265 return nil
266 }
267 return json.RawMessage(`{"type":"ephemeral"}`)
268}
269
270func fromLLMContent(c llm.Content) content {
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700271 var toolResult []content
272 if len(c.ToolResult) > 0 {
273 toolResult = make([]content, len(c.ToolResult))
274 for i, tr := range c.ToolResult {
275 // For image content inside a tool_result, we need to map it to "image" type
276 if tr.MediaType != "" && tr.MediaType == "image/jpeg" || tr.MediaType == "image/png" {
277 // Format as an image for Claude
278 toolResult[i] = content{
279 Type: "image",
280 Source: json.RawMessage(fmt.Sprintf(`{"type":"base64","media_type":"%s","data":"%s"}`,
281 tr.MediaType, tr.Data)),
282 }
283 } else {
284 toolResult[i] = fromLLMContent(tr)
285 }
286 }
287 }
288
289 d := content{
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700290 ID: c.ID,
291 Type: fromLLMContentType[c.Type],
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700292 MediaType: c.MediaType,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700293 Thinking: c.Thinking,
294 Data: c.Data,
295 Signature: c.Signature,
296 ToolName: c.ToolName,
297 ToolInput: c.ToolInput,
298 ToolUseID: c.ToolUseID,
299 ToolError: c.ToolError,
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700300 ToolResult: toolResult,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700301 CacheControl: fromLLMCache(c.Cache),
302 }
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700303 // Anthropic API complains if Text is specified when it shouldn't be
304 // or not specified when it's the empty string.
305 if c.Type != llm.ContentTypeToolResult && c.Type != llm.ContentTypeToolUse {
306 d.Text = &c.Text
307 }
308 return d
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700309}
310
311func fromLLMToolUse(tu *llm.ToolUse) *toolUse {
312 if tu == nil {
313 return nil
314 }
315 return &toolUse{
316 ID: tu.ID,
317 Name: tu.Name,
318 }
319}
320
321func fromLLMMessage(msg llm.Message) message {
322 return message{
323 Role: fromLLMRole[msg.Role],
324 Content: mapped(msg.Content, fromLLMContent),
325 ToolUse: fromLLMToolUse(msg.ToolUse),
326 }
327}
328
329func fromLLMToolChoice(tc *llm.ToolChoice) *toolChoice {
330 if tc == nil {
331 return nil
332 }
333 return &toolChoice{
334 Type: fromLLMToolChoiceType[tc.Type],
335 Name: tc.Name,
336 }
337}
338
339func fromLLMTool(t *llm.Tool) *tool {
340 return &tool{
341 Name: t.Name,
342 Type: t.Type,
343 Description: t.Description,
344 InputSchema: t.InputSchema,
345 }
346}
347
348func fromLLMSystem(s llm.SystemContent) systemContent {
349 return systemContent{
350 Text: s.Text,
351 Type: s.Type,
352 CacheControl: fromLLMCache(s.Cache),
353 }
354}
355
356func (s *Service) fromLLMRequest(r *llm.Request) *request {
357 return &request{
358 Model: cmp.Or(s.Model, DefaultModel),
359 Messages: mapped(r.Messages, fromLLMMessage),
360 MaxTokens: cmp.Or(s.MaxTokens, DefaultMaxTokens),
361 ToolChoice: fromLLMToolChoice(r.ToolChoice),
362 Tools: mapped(r.Tools, fromLLMTool),
363 System: mapped(r.System, fromLLMSystem),
364 }
365}
366
367func toLLMUsage(u usage) llm.Usage {
368 return llm.Usage{
369 InputTokens: u.InputTokens,
370 CacheCreationInputTokens: u.CacheCreationInputTokens,
371 CacheReadInputTokens: u.CacheReadInputTokens,
372 OutputTokens: u.OutputTokens,
373 CostUSD: u.CostUSD,
374 }
375}
376
377func toLLMContent(c content) llm.Content {
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700378 // Convert toolResult from []content to []llm.Content
379 var toolResultContents []llm.Content
380 if len(c.ToolResult) > 0 {
381 toolResultContents = make([]llm.Content, len(c.ToolResult))
382 for i, tr := range c.ToolResult {
383 toolResultContents[i] = toLLMContent(tr)
384 }
385 }
386
387 ret := llm.Content{
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700388 ID: c.ID,
389 Type: toLLMContentType[c.Type],
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700390 MediaType: c.MediaType,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700391 Thinking: c.Thinking,
392 Data: c.Data,
393 Signature: c.Signature,
394 ToolName: c.ToolName,
395 ToolInput: c.ToolInput,
396 ToolUseID: c.ToolUseID,
397 ToolError: c.ToolError,
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700398 ToolResult: toolResultContents,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700399 }
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700400 if c.Text != nil {
401 ret.Text = *c.Text
402 }
403 return ret
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700404}
405
406func toLLMResponse(r *response) *llm.Response {
407 return &llm.Response{
408 ID: r.ID,
409 Type: r.Type,
410 Role: toLLMRole[r.Role],
411 Model: r.Model,
412 Content: mapped(r.Content, toLLMContent),
413 StopReason: toLLMStopReason[r.StopReason],
414 StopSequence: r.StopSequence,
415 Usage: toLLMUsage(r.Usage),
416 }
417}
418
419// Do sends a request to Anthropic.
420func (s *Service) Do(ctx context.Context, ir *llm.Request) (*llm.Response, error) {
421 request := s.fromLLMRequest(ir)
422
423 var payload []byte
424 var err error
425 if dumpText || testing.Testing() {
426 payload, err = json.MarshalIndent(request, "", " ")
427 } else {
428 payload, err = json.Marshal(request)
429 payload = append(payload, '\n')
430 }
431 if err != nil {
432 return nil, err
433 }
434
435 if false {
436 fmt.Printf("claude request payload:\n%s\n", payload)
437 }
438
439 backoff := []time.Duration{15 * time.Second, 30 * time.Second, time.Minute}
440 largerMaxTokens := false
441 var partialUsage usage
442
443 url := cmp.Or(s.URL, DefaultURL)
444 httpc := cmp.Or(s.HTTPC, http.DefaultClient)
445
446 // retry loop
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -0700447 var errs error // accumulated errors across all attempts
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700448 for attempts := 0; ; attempts++ {
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -0700449 if attempts > 10 {
450 return nil, fmt.Errorf("anthropic request failed after %d attempts: %w", attempts, errs)
451 }
452 if attempts > 0 {
453 sleep := backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
454 slog.WarnContext(ctx, "anthropic request sleep before retry", "sleep", sleep, "attempts", attempts)
455 time.Sleep(sleep)
456 }
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700457 if dumpText {
458 fmt.Printf("RAW REQUEST:\n%s\n\n", payload)
459 }
460 req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(payload))
461 if err != nil {
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -0700462 return nil, errors.Join(errs, err)
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700463 }
464
465 req.Header.Set("Content-Type", "application/json")
466 req.Header.Set("X-API-Key", s.APIKey)
467 req.Header.Set("Anthropic-Version", "2023-06-01")
468
469 var features []string
470 if request.TokenEfficientToolUse {
471 features = append(features, "token-efficient-tool-use-2025-02-19")
472 }
473 if largerMaxTokens {
474 features = append(features, "output-128k-2025-02-19")
475 request.MaxTokens = 128 * 1024
476 }
477 if len(features) > 0 {
478 req.Header.Set("anthropic-beta", strings.Join(features, ","))
479 }
480
481 resp, err := httpc.Do(req)
482 if err != nil {
Josh Bleecher Snyder3b5646f2025-05-23 16:47:53 +0000483 // Don't retry httprr cache misses
484 if strings.Contains(err.Error(), "cached HTTP response not found") {
485 return nil, err
486 }
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -0700487 errs = errors.Join(errs, err)
488 continue
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700489 }
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -0700490 buf, err := io.ReadAll(resp.Body)
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700491 resp.Body.Close()
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -0700492 if err != nil {
493 errs = errors.Join(errs, err)
494 continue
495 }
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700496
497 switch {
498 case resp.StatusCode == http.StatusOK:
499 if dumpText {
500 fmt.Printf("RAW RESPONSE:\n%s\n\n", buf)
501 }
502 var response response
503 err = json.NewDecoder(bytes.NewReader(buf)).Decode(&response)
504 if err != nil {
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -0700505 return nil, errors.Join(errs, err)
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700506 }
507 if response.StopReason == "max_tokens" && !largerMaxTokens {
Josh Bleecher Snyder29fea842025-05-06 01:51:09 +0000508 slog.InfoContext(ctx, "anthropic_retrying_with_larger_tokens", "message", "Retrying Anthropic API call with larger max tokens size")
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700509 // Retry with more output tokens.
510 largerMaxTokens = true
Josh Bleecher Snyder59bb27d2025-06-05 07:32:10 -0700511 response.Usage.CostUSD = llm.CostUSDFromResponse(resp.Header)
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700512 partialUsage = response.Usage
513 continue
514 }
515
516 // Calculate and set the cost_usd field
517 if largerMaxTokens {
518 response.Usage.Add(partialUsage)
519 }
Josh Bleecher Snyder59bb27d2025-06-05 07:32:10 -0700520 response.Usage.CostUSD = llm.CostUSDFromResponse(resp.Header)
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700521
522 return toLLMResponse(&response), nil
523 case resp.StatusCode >= 500 && resp.StatusCode < 600:
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -0700524 // server error, retry
525 slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode)
526 errs = errors.Join(errs, fmt.Errorf("status %v: %s", resp.Status, buf))
527 continue
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700528 case resp.StatusCode == 429:
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -0700529 // rate limited, retry
530 slog.WarnContext(ctx, "anthropic_request_rate_limited", "response", string(buf))
531 errs = errors.Join(errs, fmt.Errorf("status %v: %s", resp.Status, buf))
532 continue
533 case resp.StatusCode >= 400 && resp.StatusCode < 500:
534 // some other 400, probably unrecoverable
535 slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode)
536 return nil, errors.Join(errs, fmt.Errorf("status %v: %s", resp.Status, buf))
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700537 default:
Josh Bleecher Snydera4500c92025-05-15 15:38:32 -0700538 // ...retry, I guess?
539 slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode)
540 errs = errors.Join(errs, fmt.Errorf("status %v: %s", resp.Status, buf))
541 continue
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700542 }
543 }
544}