| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 1 | package ant |
| 2 | |
| 3 | import ( |
| 4 | "bytes" |
| 5 | "cmp" |
| 6 | "context" |
| 7 | "encoding/json" |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 8 | "errors" |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 9 | "fmt" |
| 10 | "io" |
| 11 | "log/slog" |
| 12 | "math/rand/v2" |
| 13 | "net/http" |
| 14 | "strings" |
| 15 | "testing" |
| 16 | "time" |
| 17 | |
| 18 | "sketch.dev/llm" |
| 19 | ) |
| 20 | |
| 21 | const ( |
| Josh Bleecher Snyder | 0efb29d | 2025-05-22 21:05:04 -0700 | [diff] [blame] | 22 | DefaultModel = Claude4Sonnet |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 23 | // See https://docs.anthropic.com/en/docs/about-claude/models/all-models for |
| 24 | // current maximums. There's currently a flag to enable 128k output (output-128k-2025-02-19) |
| 25 | DefaultMaxTokens = 8192 |
| 26 | DefaultURL = "https://api.anthropic.com/v1/messages" |
| 27 | ) |
| 28 | |
| 29 | const ( |
| 30 | Claude35Sonnet = "claude-3-5-sonnet-20241022" |
| 31 | Claude35Haiku = "claude-3-5-haiku-20241022" |
| 32 | Claude37Sonnet = "claude-3-7-sonnet-20250219" |
| Josh Bleecher Snyder | 0e8073a | 2025-05-22 21:04:51 -0700 | [diff] [blame] | 33 | Claude4Sonnet = "claude-sonnet-4-20250514" |
| 34 | Claude4Opus = "claude-opus-4-20250514" |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 35 | ) |
| 36 | |
| 37 | // Service provides Claude completions. |
| 38 | // Fields should not be altered concurrently with calling any method on Service. |
| 39 | type Service struct { |
| 40 | HTTPC *http.Client // defaults to http.DefaultClient if nil |
| 41 | URL string // defaults to DefaultURL if empty |
| 42 | APIKey string // must be non-empty |
| 43 | Model string // defaults to DefaultModel if empty |
| 44 | MaxTokens int // defaults to DefaultMaxTokens if zero |
| 45 | } |
| 46 | |
| 47 | var _ llm.Service = (*Service)(nil) |
| 48 | |
| 49 | type content struct { |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 50 | // https://docs.anthropic.com/en/api/messages |
| 51 | ID string `json:"id,omitempty"` |
| 52 | Type string `json:"type,omitempty"` |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 53 | |
| 54 | // Subtly, an empty string appears in tool results often, so we have |
| 55 | // to distinguish between empty string and no string. |
| 56 | // Underlying error looks like one of: |
| 57 | // "messages.46.content.0.tool_result.content.0.text.text: Field required"" |
| 58 | // "messages.1.content.1.tool_use.text: Extra inputs are not permitted" |
| 59 | // |
| 60 | // I haven't found a super great source for the API, but |
| 61 | // https://github.com/anthropics/anthropic-sdk-typescript/blob/main/src/resources/messages/messages.ts |
| 62 | // is somewhat acceptable but hard to read. |
| 63 | Text *string `json:"text,omitempty"` |
| 64 | MediaType string `json:"media_type,omitempty"` // for image |
| 65 | Source json.RawMessage `json:"source,omitempty"` // for image |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 66 | |
| 67 | // for thinking |
| 68 | Thinking string `json:"thinking,omitempty"` |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 69 | Data string `json:"data,omitempty"` // for redacted_thinking or image |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 70 | Signature string `json:"signature,omitempty"` // for thinking |
| 71 | |
| 72 | // for tool_use |
| 73 | ToolName string `json:"name,omitempty"` |
| 74 | ToolInput json.RawMessage `json:"input,omitempty"` |
| 75 | |
| 76 | // for tool_result |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 77 | ToolUseID string `json:"tool_use_id,omitempty"` |
| 78 | ToolError bool `json:"is_error,omitempty"` |
| 79 | // note the recursive nature here; message looks like: |
| 80 | // { |
| 81 | // "role": "user", |
| 82 | // "content": [ |
| 83 | // { |
| 84 | // "type": "tool_result", |
| 85 | // "tool_use_id": "toolu_01A09q90qw90lq917835lq9", |
| 86 | // "content": [ |
| 87 | // {"type": "text", "text": "15 degrees"}, |
| 88 | // { |
| 89 | // "type": "image", |
| 90 | // "source": { |
| 91 | // "type": "base64", |
| 92 | // "media_type": "image/jpeg", |
| 93 | // "data": "/9j/4AAQSkZJRg...", |
| 94 | // } |
| 95 | // } |
| 96 | // ] |
| 97 | // } |
| 98 | // ] |
| 99 | //} |
| 100 | ToolResult []content `json:"content,omitempty"` |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 101 | |
| 102 | // timing information for tool_result; not sent to Claude |
| 103 | StartTime *time.Time `json:"-"` |
| 104 | EndTime *time.Time `json:"-"` |
| 105 | |
| 106 | CacheControl json.RawMessage `json:"cache_control,omitempty"` |
| 107 | } |
| 108 | |
| 109 | // message represents a message in the conversation. |
| 110 | type message struct { |
| 111 | Role string `json:"role"` |
| 112 | Content []content `json:"content"` |
| 113 | ToolUse *toolUse `json:"tool_use,omitempty"` // use to control whether/which tool to use |
| 114 | } |
| 115 | |
| 116 | // toolUse represents a tool use in the message content. |
| 117 | type toolUse struct { |
| 118 | ID string `json:"id"` |
| 119 | Name string `json:"name"` |
| 120 | } |
| 121 | |
| 122 | // tool represents a tool available to Claude. |
| 123 | type tool struct { |
| 124 | Name string `json:"name"` |
| 125 | // Type is used by the text editor tool; see |
| 126 | // https://docs.anthropic.com/en/docs/build-with-claude/tool-use/text-editor-tool |
| 127 | Type string `json:"type,omitempty"` |
| 128 | Description string `json:"description,omitempty"` |
| 129 | InputSchema json.RawMessage `json:"input_schema,omitempty"` |
| 130 | } |
| 131 | |
| 132 | // usage represents the billing and rate-limit usage. |
| 133 | type usage struct { |
| 134 | InputTokens uint64 `json:"input_tokens"` |
| 135 | CacheCreationInputTokens uint64 `json:"cache_creation_input_tokens"` |
| 136 | CacheReadInputTokens uint64 `json:"cache_read_input_tokens"` |
| 137 | OutputTokens uint64 `json:"output_tokens"` |
| 138 | CostUSD float64 `json:"cost_usd"` |
| 139 | } |
| 140 | |
| 141 | func (u *usage) Add(other usage) { |
| 142 | u.InputTokens += other.InputTokens |
| 143 | u.CacheCreationInputTokens += other.CacheCreationInputTokens |
| 144 | u.CacheReadInputTokens += other.CacheReadInputTokens |
| 145 | u.OutputTokens += other.OutputTokens |
| 146 | u.CostUSD += other.CostUSD |
| 147 | } |
| 148 | |
| 149 | type errorResponse struct { |
| 150 | Type string `json:"type"` |
| 151 | Message string `json:"message"` |
| 152 | } |
| 153 | |
| 154 | // response represents the response from the message API. |
| 155 | type response struct { |
| 156 | ID string `json:"id"` |
| 157 | Type string `json:"type"` |
| 158 | Role string `json:"role"` |
| 159 | Model string `json:"model"` |
| 160 | Content []content `json:"content"` |
| 161 | StopReason string `json:"stop_reason"` |
| 162 | StopSequence *string `json:"stop_sequence,omitempty"` |
| 163 | Usage usage `json:"usage"` |
| 164 | } |
| 165 | |
| 166 | type toolChoice struct { |
| 167 | Type string `json:"type"` |
| 168 | Name string `json:"name,omitempty"` |
| 169 | } |
| 170 | |
| 171 | // https://docs.anthropic.com/en/api/messages#body-system |
| 172 | type systemContent struct { |
| 173 | Text string `json:"text,omitempty"` |
| 174 | Type string `json:"type,omitempty"` |
| 175 | CacheControl json.RawMessage `json:"cache_control,omitempty"` |
| 176 | } |
| 177 | |
| 178 | // request represents the request payload for creating a message. |
| 179 | type request struct { |
| 180 | Model string `json:"model"` |
| 181 | Messages []message `json:"messages"` |
| 182 | ToolChoice *toolChoice `json:"tool_choice,omitempty"` |
| 183 | MaxTokens int `json:"max_tokens"` |
| 184 | Tools []*tool `json:"tools,omitempty"` |
| 185 | Stream bool `json:"stream,omitempty"` |
| 186 | System []systemContent `json:"system,omitempty"` |
| 187 | Temperature float64 `json:"temperature,omitempty"` |
| 188 | TopK int `json:"top_k,omitempty"` |
| 189 | TopP float64 `json:"top_p,omitempty"` |
| 190 | StopSequences []string `json:"stop_sequences,omitempty"` |
| 191 | |
| 192 | TokenEfficientToolUse bool `json:"-"` // DO NOT USE, broken on Anthropic's side as of 2025-02-28 |
| 193 | } |
| 194 | |
| 195 | const dumpText = false // debugging toggle to see raw communications with Claude |
| 196 | |
| 197 | func mapped[Slice ~[]E, E, T any](s Slice, f func(E) T) []T { |
| 198 | out := make([]T, len(s)) |
| 199 | for i, v := range s { |
| 200 | out[i] = f(v) |
| 201 | } |
| 202 | return out |
| 203 | } |
| 204 | |
| 205 | func inverted[K, V cmp.Ordered](m map[K]V) map[V]K { |
| 206 | inv := make(map[V]K) |
| 207 | for k, v := range m { |
| 208 | if _, ok := inv[v]; ok { |
| 209 | panic(fmt.Errorf("inverted map has multiple keys for value %v", v)) |
| 210 | } |
| 211 | inv[v] = k |
| 212 | } |
| 213 | return inv |
| 214 | } |
| 215 | |
| 216 | var ( |
| 217 | fromLLMRole = map[llm.MessageRole]string{ |
| 218 | llm.MessageRoleAssistant: "assistant", |
| 219 | llm.MessageRoleUser: "user", |
| 220 | } |
| 221 | toLLMRole = inverted(fromLLMRole) |
| 222 | |
| 223 | fromLLMContentType = map[llm.ContentType]string{ |
| 224 | llm.ContentTypeText: "text", |
| 225 | llm.ContentTypeThinking: "thinking", |
| 226 | llm.ContentTypeRedactedThinking: "redacted_thinking", |
| 227 | llm.ContentTypeToolUse: "tool_use", |
| 228 | llm.ContentTypeToolResult: "tool_result", |
| 229 | } |
| 230 | toLLMContentType = inverted(fromLLMContentType) |
| 231 | |
| 232 | fromLLMToolChoiceType = map[llm.ToolChoiceType]string{ |
| 233 | llm.ToolChoiceTypeAuto: "auto", |
| 234 | llm.ToolChoiceTypeAny: "any", |
| 235 | llm.ToolChoiceTypeNone: "none", |
| 236 | llm.ToolChoiceTypeTool: "tool", |
| 237 | } |
| 238 | |
| 239 | toLLMStopReason = map[string]llm.StopReason{ |
| 240 | "stop_sequence": llm.StopReasonStopSequence, |
| 241 | "max_tokens": llm.StopReasonMaxTokens, |
| 242 | "end_turn": llm.StopReasonEndTurn, |
| 243 | "tool_use": llm.StopReasonToolUse, |
| Josh Bleecher Snyder | 0e8073a | 2025-05-22 21:04:51 -0700 | [diff] [blame] | 244 | "refusal": llm.StopReasonRefusal, |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 245 | } |
| 246 | ) |
| 247 | |
| 248 | func fromLLMCache(c bool) json.RawMessage { |
| 249 | if !c { |
| 250 | return nil |
| 251 | } |
| 252 | return json.RawMessage(`{"type":"ephemeral"}`) |
| 253 | } |
| 254 | |
| 255 | func fromLLMContent(c llm.Content) content { |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 256 | var toolResult []content |
| 257 | if len(c.ToolResult) > 0 { |
| 258 | toolResult = make([]content, len(c.ToolResult)) |
| 259 | for i, tr := range c.ToolResult { |
| 260 | // For image content inside a tool_result, we need to map it to "image" type |
| 261 | if tr.MediaType != "" && tr.MediaType == "image/jpeg" || tr.MediaType == "image/png" { |
| 262 | // Format as an image for Claude |
| 263 | toolResult[i] = content{ |
| 264 | Type: "image", |
| 265 | Source: json.RawMessage(fmt.Sprintf(`{"type":"base64","media_type":"%s","data":"%s"}`, |
| 266 | tr.MediaType, tr.Data)), |
| 267 | } |
| 268 | } else { |
| 269 | toolResult[i] = fromLLMContent(tr) |
| 270 | } |
| 271 | } |
| 272 | } |
| 273 | |
| 274 | d := content{ |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 275 | ID: c.ID, |
| 276 | Type: fromLLMContentType[c.Type], |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 277 | MediaType: c.MediaType, |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 278 | Thinking: c.Thinking, |
| 279 | Data: c.Data, |
| 280 | Signature: c.Signature, |
| 281 | ToolName: c.ToolName, |
| 282 | ToolInput: c.ToolInput, |
| 283 | ToolUseID: c.ToolUseID, |
| 284 | ToolError: c.ToolError, |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 285 | ToolResult: toolResult, |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 286 | CacheControl: fromLLMCache(c.Cache), |
| 287 | } |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 288 | // Anthropic API complains if Text is specified when it shouldn't be |
| 289 | // or not specified when it's the empty string. |
| 290 | if c.Type != llm.ContentTypeToolResult && c.Type != llm.ContentTypeToolUse { |
| 291 | d.Text = &c.Text |
| 292 | } |
| 293 | return d |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 294 | } |
| 295 | |
| 296 | func fromLLMToolUse(tu *llm.ToolUse) *toolUse { |
| 297 | if tu == nil { |
| 298 | return nil |
| 299 | } |
| 300 | return &toolUse{ |
| 301 | ID: tu.ID, |
| 302 | Name: tu.Name, |
| 303 | } |
| 304 | } |
| 305 | |
| 306 | func fromLLMMessage(msg llm.Message) message { |
| 307 | return message{ |
| 308 | Role: fromLLMRole[msg.Role], |
| 309 | Content: mapped(msg.Content, fromLLMContent), |
| 310 | ToolUse: fromLLMToolUse(msg.ToolUse), |
| 311 | } |
| 312 | } |
| 313 | |
| 314 | func fromLLMToolChoice(tc *llm.ToolChoice) *toolChoice { |
| 315 | if tc == nil { |
| 316 | return nil |
| 317 | } |
| 318 | return &toolChoice{ |
| 319 | Type: fromLLMToolChoiceType[tc.Type], |
| 320 | Name: tc.Name, |
| 321 | } |
| 322 | } |
| 323 | |
| 324 | func fromLLMTool(t *llm.Tool) *tool { |
| 325 | return &tool{ |
| 326 | Name: t.Name, |
| 327 | Type: t.Type, |
| 328 | Description: t.Description, |
| 329 | InputSchema: t.InputSchema, |
| 330 | } |
| 331 | } |
| 332 | |
| 333 | func fromLLMSystem(s llm.SystemContent) systemContent { |
| 334 | return systemContent{ |
| 335 | Text: s.Text, |
| 336 | Type: s.Type, |
| 337 | CacheControl: fromLLMCache(s.Cache), |
| 338 | } |
| 339 | } |
| 340 | |
| 341 | func (s *Service) fromLLMRequest(r *llm.Request) *request { |
| 342 | return &request{ |
| 343 | Model: cmp.Or(s.Model, DefaultModel), |
| 344 | Messages: mapped(r.Messages, fromLLMMessage), |
| 345 | MaxTokens: cmp.Or(s.MaxTokens, DefaultMaxTokens), |
| 346 | ToolChoice: fromLLMToolChoice(r.ToolChoice), |
| 347 | Tools: mapped(r.Tools, fromLLMTool), |
| 348 | System: mapped(r.System, fromLLMSystem), |
| 349 | } |
| 350 | } |
| 351 | |
| 352 | func toLLMUsage(u usage) llm.Usage { |
| 353 | return llm.Usage{ |
| 354 | InputTokens: u.InputTokens, |
| 355 | CacheCreationInputTokens: u.CacheCreationInputTokens, |
| 356 | CacheReadInputTokens: u.CacheReadInputTokens, |
| 357 | OutputTokens: u.OutputTokens, |
| 358 | CostUSD: u.CostUSD, |
| 359 | } |
| 360 | } |
| 361 | |
| 362 | func toLLMContent(c content) llm.Content { |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 363 | // Convert toolResult from []content to []llm.Content |
| 364 | var toolResultContents []llm.Content |
| 365 | if len(c.ToolResult) > 0 { |
| 366 | toolResultContents = make([]llm.Content, len(c.ToolResult)) |
| 367 | for i, tr := range c.ToolResult { |
| 368 | toolResultContents[i] = toLLMContent(tr) |
| 369 | } |
| 370 | } |
| 371 | |
| 372 | ret := llm.Content{ |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 373 | ID: c.ID, |
| 374 | Type: toLLMContentType[c.Type], |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 375 | MediaType: c.MediaType, |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 376 | Thinking: c.Thinking, |
| 377 | Data: c.Data, |
| 378 | Signature: c.Signature, |
| 379 | ToolName: c.ToolName, |
| 380 | ToolInput: c.ToolInput, |
| 381 | ToolUseID: c.ToolUseID, |
| 382 | ToolError: c.ToolError, |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 383 | ToolResult: toolResultContents, |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 384 | } |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 385 | if c.Text != nil { |
| 386 | ret.Text = *c.Text |
| 387 | } |
| 388 | return ret |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 389 | } |
| 390 | |
| 391 | func toLLMResponse(r *response) *llm.Response { |
| 392 | return &llm.Response{ |
| 393 | ID: r.ID, |
| 394 | Type: r.Type, |
| 395 | Role: toLLMRole[r.Role], |
| 396 | Model: r.Model, |
| 397 | Content: mapped(r.Content, toLLMContent), |
| 398 | StopReason: toLLMStopReason[r.StopReason], |
| 399 | StopSequence: r.StopSequence, |
| 400 | Usage: toLLMUsage(r.Usage), |
| 401 | } |
| 402 | } |
| 403 | |
| 404 | // Do sends a request to Anthropic. |
| 405 | func (s *Service) Do(ctx context.Context, ir *llm.Request) (*llm.Response, error) { |
| 406 | request := s.fromLLMRequest(ir) |
| 407 | |
| 408 | var payload []byte |
| 409 | var err error |
| 410 | if dumpText || testing.Testing() { |
| 411 | payload, err = json.MarshalIndent(request, "", " ") |
| 412 | } else { |
| 413 | payload, err = json.Marshal(request) |
| 414 | payload = append(payload, '\n') |
| 415 | } |
| 416 | if err != nil { |
| 417 | return nil, err |
| 418 | } |
| 419 | |
| 420 | if false { |
| 421 | fmt.Printf("claude request payload:\n%s\n", payload) |
| 422 | } |
| 423 | |
| 424 | backoff := []time.Duration{15 * time.Second, 30 * time.Second, time.Minute} |
| 425 | largerMaxTokens := false |
| 426 | var partialUsage usage |
| 427 | |
| 428 | url := cmp.Or(s.URL, DefaultURL) |
| 429 | httpc := cmp.Or(s.HTTPC, http.DefaultClient) |
| 430 | |
| 431 | // retry loop |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 432 | var errs error // accumulated errors across all attempts |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 433 | for attempts := 0; ; attempts++ { |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 434 | if attempts > 10 { |
| 435 | return nil, fmt.Errorf("anthropic request failed after %d attempts: %w", attempts, errs) |
| 436 | } |
| 437 | if attempts > 0 { |
| 438 | sleep := backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second))) |
| 439 | slog.WarnContext(ctx, "anthropic request sleep before retry", "sleep", sleep, "attempts", attempts) |
| 440 | time.Sleep(sleep) |
| 441 | } |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 442 | if dumpText { |
| 443 | fmt.Printf("RAW REQUEST:\n%s\n\n", payload) |
| 444 | } |
| 445 | req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(payload)) |
| 446 | if err != nil { |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 447 | return nil, errors.Join(errs, err) |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 448 | } |
| 449 | |
| 450 | req.Header.Set("Content-Type", "application/json") |
| 451 | req.Header.Set("X-API-Key", s.APIKey) |
| 452 | req.Header.Set("Anthropic-Version", "2023-06-01") |
| 453 | |
| 454 | var features []string |
| 455 | if request.TokenEfficientToolUse { |
| 456 | features = append(features, "token-efficient-tool-use-2025-02-19") |
| 457 | } |
| 458 | if largerMaxTokens { |
| 459 | features = append(features, "output-128k-2025-02-19") |
| 460 | request.MaxTokens = 128 * 1024 |
| 461 | } |
| 462 | if len(features) > 0 { |
| 463 | req.Header.Set("anthropic-beta", strings.Join(features, ",")) |
| 464 | } |
| 465 | |
| 466 | resp, err := httpc.Do(req) |
| 467 | if err != nil { |
| Josh Bleecher Snyder | 3b5646f | 2025-05-23 16:47:53 +0000 | [diff] [blame] | 468 | // Don't retry httprr cache misses |
| 469 | if strings.Contains(err.Error(), "cached HTTP response not found") { |
| 470 | return nil, err |
| 471 | } |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 472 | errs = errors.Join(errs, err) |
| 473 | continue |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 474 | } |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 475 | buf, err := io.ReadAll(resp.Body) |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 476 | resp.Body.Close() |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 477 | if err != nil { |
| 478 | errs = errors.Join(errs, err) |
| 479 | continue |
| 480 | } |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 481 | |
| 482 | switch { |
| 483 | case resp.StatusCode == http.StatusOK: |
| 484 | if dumpText { |
| 485 | fmt.Printf("RAW RESPONSE:\n%s\n\n", buf) |
| 486 | } |
| 487 | var response response |
| 488 | err = json.NewDecoder(bytes.NewReader(buf)).Decode(&response) |
| 489 | if err != nil { |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 490 | return nil, errors.Join(errs, err) |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 491 | } |
| 492 | if response.StopReason == "max_tokens" && !largerMaxTokens { |
| Josh Bleecher Snyder | 29fea84 | 2025-05-06 01:51:09 +0000 | [diff] [blame] | 493 | slog.InfoContext(ctx, "anthropic_retrying_with_larger_tokens", "message", "Retrying Anthropic API call with larger max tokens size") |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 494 | // Retry with more output tokens. |
| 495 | largerMaxTokens = true |
| 496 | response.Usage.CostUSD = response.TotalDollars() |
| 497 | partialUsage = response.Usage |
| 498 | continue |
| 499 | } |
| 500 | |
| 501 | // Calculate and set the cost_usd field |
| 502 | if largerMaxTokens { |
| 503 | response.Usage.Add(partialUsage) |
| 504 | } |
| 505 | response.Usage.CostUSD = response.TotalDollars() |
| 506 | |
| 507 | return toLLMResponse(&response), nil |
| 508 | case resp.StatusCode >= 500 && resp.StatusCode < 600: |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 509 | // server error, retry |
| 510 | slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode) |
| 511 | errs = errors.Join(errs, fmt.Errorf("status %v: %s", resp.Status, buf)) |
| 512 | continue |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 513 | case resp.StatusCode == 429: |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 514 | // rate limited, retry |
| 515 | slog.WarnContext(ctx, "anthropic_request_rate_limited", "response", string(buf)) |
| 516 | errs = errors.Join(errs, fmt.Errorf("status %v: %s", resp.Status, buf)) |
| 517 | continue |
| 518 | case resp.StatusCode >= 400 && resp.StatusCode < 500: |
| 519 | // some other 400, probably unrecoverable |
| 520 | slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode) |
| 521 | return nil, errors.Join(errs, fmt.Errorf("status %v: %s", resp.Status, buf)) |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 522 | default: |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 523 | // ...retry, I guess? |
| 524 | slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode) |
| 525 | errs = errors.Join(errs, fmt.Errorf("status %v: %s", resp.Status, buf)) |
| 526 | continue |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 527 | } |
| 528 | } |
| 529 | } |
| 530 | |
| 531 | // cents per million tokens |
| 532 | // (not dollars because i'm twitchy about using floats for money) |
| 533 | type centsPer1MTokens struct { |
| 534 | Input uint64 |
| 535 | Output uint64 |
| 536 | CacheRead uint64 |
| 537 | CacheCreation uint64 |
| 538 | } |
| 539 | |
| 540 | // https://www.anthropic.com/pricing#anthropic-api |
| 541 | var modelCost = map[string]centsPer1MTokens{ |
| 542 | Claude37Sonnet: { |
| 543 | Input: 300, // $3 |
| 544 | Output: 1500, // $15 |
| 545 | CacheRead: 30, // $0.30 |
| 546 | CacheCreation: 375, // $3.75 |
| 547 | }, |
| 548 | Claude35Haiku: { |
| 549 | Input: 80, // $0.80 |
| 550 | Output: 400, // $4.00 |
| 551 | CacheRead: 8, // $0.08 |
| 552 | CacheCreation: 100, // $1.00 |
| 553 | }, |
| 554 | Claude35Sonnet: { |
| 555 | Input: 300, // $3 |
| 556 | Output: 1500, // $15 |
| 557 | CacheRead: 30, // $0.30 |
| 558 | CacheCreation: 375, // $3.75 |
| 559 | }, |
| Josh Bleecher Snyder | 0e8073a | 2025-05-22 21:04:51 -0700 | [diff] [blame] | 560 | Claude4Sonnet: { |
| 561 | Input: 300, // $3 |
| 562 | Output: 1500, // $15 |
| 563 | CacheRead: 30, // $0.30 |
| 564 | CacheCreation: 375, // $3.75 |
| 565 | }, |
| 566 | Claude4Opus: { |
| 567 | Input: 1500, // $15 |
| 568 | Output: 7500, // $75 |
| 569 | CacheRead: 150, // $1.50 |
| 570 | CacheCreation: 1875, // $18.75 |
| 571 | }, |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 572 | } |
| 573 | |
| 574 | // TotalDollars returns the total cost to obtain this response, in dollars. |
| 575 | func (mr *response) TotalDollars() float64 { |
| 576 | cpm, ok := modelCost[mr.Model] |
| 577 | if !ok { |
| 578 | panic(fmt.Sprintf("no pricing info for model: %s", mr.Model)) |
| 579 | } |
| 580 | use := mr.Usage |
| 581 | megaCents := use.InputTokens*cpm.Input + |
| 582 | use.OutputTokens*cpm.Output + |
| 583 | use.CacheReadInputTokens*cpm.CacheRead + |
| 584 | use.CacheCreationInputTokens*cpm.CacheCreation |
| 585 | cents := float64(megaCents) / 1_000_000.0 |
| 586 | return cents / 100.0 |
| 587 | } |