| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 1 | package ant |
| 2 | |
| 3 | import ( |
| 4 | "bytes" |
| 5 | "cmp" |
| 6 | "context" |
| 7 | "encoding/json" |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 8 | "errors" |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 9 | "fmt" |
| 10 | "io" |
| 11 | "log/slog" |
| 12 | "math/rand/v2" |
| 13 | "net/http" |
| 14 | "strings" |
| 15 | "testing" |
| 16 | "time" |
| 17 | |
| 18 | "sketch.dev/llm" |
| 19 | ) |
| 20 | |
| 21 | const ( |
| Josh Bleecher Snyder | 0efb29d | 2025-05-22 21:05:04 -0700 | [diff] [blame] | 22 | DefaultModel = Claude4Sonnet |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 23 | // See https://docs.anthropic.com/en/docs/about-claude/models/all-models for |
| 24 | // current maximums. There's currently a flag to enable 128k output (output-128k-2025-02-19) |
| 25 | DefaultMaxTokens = 8192 |
| 26 | DefaultURL = "https://api.anthropic.com/v1/messages" |
| 27 | ) |
| 28 | |
| 29 | const ( |
| 30 | Claude35Sonnet = "claude-3-5-sonnet-20241022" |
| 31 | Claude35Haiku = "claude-3-5-haiku-20241022" |
| 32 | Claude37Sonnet = "claude-3-7-sonnet-20250219" |
| Josh Bleecher Snyder | 0e8073a | 2025-05-22 21:04:51 -0700 | [diff] [blame] | 33 | Claude4Sonnet = "claude-sonnet-4-20250514" |
| 34 | Claude4Opus = "claude-opus-4-20250514" |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 35 | ) |
| 36 | |
| Philip Zeyliger | b8a8f35 | 2025-06-02 07:39:37 -0700 | [diff] [blame] | 37 | // TokenContextWindow returns the maximum token context window size for this service |
| 38 | func (s *Service) TokenContextWindow() int { |
| 39 | model := s.Model |
| 40 | if model == "" { |
| 41 | model = DefaultModel |
| 42 | } |
| 43 | |
| 44 | switch model { |
| 45 | case Claude35Sonnet, Claude37Sonnet: |
| 46 | return 200000 |
| 47 | case Claude35Haiku: |
| 48 | return 200000 |
| 49 | case Claude4Sonnet, Claude4Opus: |
| 50 | return 200000 |
| 51 | default: |
| 52 | // Default for unknown models |
| 53 | return 200000 |
| 54 | } |
| 55 | } |
| 56 | |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 57 | // Service provides Claude completions. |
| 58 | // Fields should not be altered concurrently with calling any method on Service. |
| 59 | type Service struct { |
| Josh Bleecher Snyder | 57afbca | 2025-07-23 13:29:59 -0700 | [diff] [blame^] | 60 | HTTPC *http.Client // defaults to http.DefaultClient if nil |
| 61 | URL string // defaults to DefaultURL if empty |
| 62 | APIKey string // must be non-empty |
| 63 | Model string // defaults to DefaultModel if empty |
| 64 | MaxTokens int // defaults to DefaultMaxTokens if zero |
| 65 | DumpLLM bool // whether to dump request/response text to files for debugging; defaults to false |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 66 | } |
| 67 | |
| 68 | var _ llm.Service = (*Service)(nil) |
| 69 | |
| 70 | type content struct { |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 71 | // https://docs.anthropic.com/en/api/messages |
| 72 | ID string `json:"id,omitempty"` |
| 73 | Type string `json:"type,omitempty"` |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 74 | |
| 75 | // Subtly, an empty string appears in tool results often, so we have |
| 76 | // to distinguish between empty string and no string. |
| 77 | // Underlying error looks like one of: |
| 78 | // "messages.46.content.0.tool_result.content.0.text.text: Field required"" |
| 79 | // "messages.1.content.1.tool_use.text: Extra inputs are not permitted" |
| 80 | // |
| 81 | // I haven't found a super great source for the API, but |
| 82 | // https://github.com/anthropics/anthropic-sdk-typescript/blob/main/src/resources/messages/messages.ts |
| 83 | // is somewhat acceptable but hard to read. |
| 84 | Text *string `json:"text,omitempty"` |
| 85 | MediaType string `json:"media_type,omitempty"` // for image |
| 86 | Source json.RawMessage `json:"source,omitempty"` // for image |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 87 | |
| 88 | // for thinking |
| 89 | Thinking string `json:"thinking,omitempty"` |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 90 | Data string `json:"data,omitempty"` // for redacted_thinking or image |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 91 | Signature string `json:"signature,omitempty"` // for thinking |
| 92 | |
| 93 | // for tool_use |
| 94 | ToolName string `json:"name,omitempty"` |
| 95 | ToolInput json.RawMessage `json:"input,omitempty"` |
| 96 | |
| 97 | // for tool_result |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 98 | ToolUseID string `json:"tool_use_id,omitempty"` |
| 99 | ToolError bool `json:"is_error,omitempty"` |
| 100 | // note the recursive nature here; message looks like: |
| 101 | // { |
| 102 | // "role": "user", |
| 103 | // "content": [ |
| 104 | // { |
| 105 | // "type": "tool_result", |
| 106 | // "tool_use_id": "toolu_01A09q90qw90lq917835lq9", |
| 107 | // "content": [ |
| 108 | // {"type": "text", "text": "15 degrees"}, |
| 109 | // { |
| 110 | // "type": "image", |
| 111 | // "source": { |
| 112 | // "type": "base64", |
| 113 | // "media_type": "image/jpeg", |
| 114 | // "data": "/9j/4AAQSkZJRg...", |
| 115 | // } |
| 116 | // } |
| 117 | // ] |
| 118 | // } |
| 119 | // ] |
| 120 | //} |
| 121 | ToolResult []content `json:"content,omitempty"` |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 122 | |
| 123 | // timing information for tool_result; not sent to Claude |
| 124 | StartTime *time.Time `json:"-"` |
| 125 | EndTime *time.Time `json:"-"` |
| 126 | |
| 127 | CacheControl json.RawMessage `json:"cache_control,omitempty"` |
| 128 | } |
| 129 | |
| 130 | // message represents a message in the conversation. |
| 131 | type message struct { |
| 132 | Role string `json:"role"` |
| 133 | Content []content `json:"content"` |
| 134 | ToolUse *toolUse `json:"tool_use,omitempty"` // use to control whether/which tool to use |
| 135 | } |
| 136 | |
| 137 | // toolUse represents a tool use in the message content. |
| 138 | type toolUse struct { |
| 139 | ID string `json:"id"` |
| 140 | Name string `json:"name"` |
| 141 | } |
| 142 | |
| 143 | // tool represents a tool available to Claude. |
| 144 | type tool struct { |
| 145 | Name string `json:"name"` |
| 146 | // Type is used by the text editor tool; see |
| 147 | // https://docs.anthropic.com/en/docs/build-with-claude/tool-use/text-editor-tool |
| 148 | Type string `json:"type,omitempty"` |
| 149 | Description string `json:"description,omitempty"` |
| 150 | InputSchema json.RawMessage `json:"input_schema,omitempty"` |
| 151 | } |
| 152 | |
| 153 | // usage represents the billing and rate-limit usage. |
| 154 | type usage struct { |
| 155 | InputTokens uint64 `json:"input_tokens"` |
| 156 | CacheCreationInputTokens uint64 `json:"cache_creation_input_tokens"` |
| 157 | CacheReadInputTokens uint64 `json:"cache_read_input_tokens"` |
| 158 | OutputTokens uint64 `json:"output_tokens"` |
| 159 | CostUSD float64 `json:"cost_usd"` |
| 160 | } |
| 161 | |
| 162 | func (u *usage) Add(other usage) { |
| 163 | u.InputTokens += other.InputTokens |
| 164 | u.CacheCreationInputTokens += other.CacheCreationInputTokens |
| 165 | u.CacheReadInputTokens += other.CacheReadInputTokens |
| 166 | u.OutputTokens += other.OutputTokens |
| 167 | u.CostUSD += other.CostUSD |
| 168 | } |
| 169 | |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 170 | // response represents the response from the message API. |
| 171 | type response struct { |
| 172 | ID string `json:"id"` |
| 173 | Type string `json:"type"` |
| 174 | Role string `json:"role"` |
| 175 | Model string `json:"model"` |
| 176 | Content []content `json:"content"` |
| 177 | StopReason string `json:"stop_reason"` |
| 178 | StopSequence *string `json:"stop_sequence,omitempty"` |
| 179 | Usage usage `json:"usage"` |
| 180 | } |
| 181 | |
| 182 | type toolChoice struct { |
| 183 | Type string `json:"type"` |
| 184 | Name string `json:"name,omitempty"` |
| 185 | } |
| 186 | |
| 187 | // https://docs.anthropic.com/en/api/messages#body-system |
| 188 | type systemContent struct { |
| 189 | Text string `json:"text,omitempty"` |
| 190 | Type string `json:"type,omitempty"` |
| 191 | CacheControl json.RawMessage `json:"cache_control,omitempty"` |
| 192 | } |
| 193 | |
| 194 | // request represents the request payload for creating a message. |
| 195 | type request struct { |
| 196 | Model string `json:"model"` |
| 197 | Messages []message `json:"messages"` |
| 198 | ToolChoice *toolChoice `json:"tool_choice,omitempty"` |
| 199 | MaxTokens int `json:"max_tokens"` |
| 200 | Tools []*tool `json:"tools,omitempty"` |
| 201 | Stream bool `json:"stream,omitempty"` |
| 202 | System []systemContent `json:"system,omitempty"` |
| 203 | Temperature float64 `json:"temperature,omitempty"` |
| 204 | TopK int `json:"top_k,omitempty"` |
| 205 | TopP float64 `json:"top_p,omitempty"` |
| 206 | StopSequences []string `json:"stop_sequences,omitempty"` |
| 207 | |
| 208 | TokenEfficientToolUse bool `json:"-"` // DO NOT USE, broken on Anthropic's side as of 2025-02-28 |
| 209 | } |
| 210 | |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 211 | func mapped[Slice ~[]E, E, T any](s Slice, f func(E) T) []T { |
| 212 | out := make([]T, len(s)) |
| 213 | for i, v := range s { |
| 214 | out[i] = f(v) |
| 215 | } |
| 216 | return out |
| 217 | } |
| 218 | |
| 219 | func inverted[K, V cmp.Ordered](m map[K]V) map[V]K { |
| 220 | inv := make(map[V]K) |
| 221 | for k, v := range m { |
| 222 | if _, ok := inv[v]; ok { |
| 223 | panic(fmt.Errorf("inverted map has multiple keys for value %v", v)) |
| 224 | } |
| 225 | inv[v] = k |
| 226 | } |
| 227 | return inv |
| 228 | } |
| 229 | |
| 230 | var ( |
| 231 | fromLLMRole = map[llm.MessageRole]string{ |
| 232 | llm.MessageRoleAssistant: "assistant", |
| 233 | llm.MessageRoleUser: "user", |
| 234 | } |
| 235 | toLLMRole = inverted(fromLLMRole) |
| 236 | |
| 237 | fromLLMContentType = map[llm.ContentType]string{ |
| 238 | llm.ContentTypeText: "text", |
| 239 | llm.ContentTypeThinking: "thinking", |
| 240 | llm.ContentTypeRedactedThinking: "redacted_thinking", |
| 241 | llm.ContentTypeToolUse: "tool_use", |
| 242 | llm.ContentTypeToolResult: "tool_result", |
| 243 | } |
| 244 | toLLMContentType = inverted(fromLLMContentType) |
| 245 | |
| 246 | fromLLMToolChoiceType = map[llm.ToolChoiceType]string{ |
| 247 | llm.ToolChoiceTypeAuto: "auto", |
| 248 | llm.ToolChoiceTypeAny: "any", |
| 249 | llm.ToolChoiceTypeNone: "none", |
| 250 | llm.ToolChoiceTypeTool: "tool", |
| 251 | } |
| 252 | |
| 253 | toLLMStopReason = map[string]llm.StopReason{ |
| 254 | "stop_sequence": llm.StopReasonStopSequence, |
| 255 | "max_tokens": llm.StopReasonMaxTokens, |
| 256 | "end_turn": llm.StopReasonEndTurn, |
| 257 | "tool_use": llm.StopReasonToolUse, |
| Josh Bleecher Snyder | 0e8073a | 2025-05-22 21:04:51 -0700 | [diff] [blame] | 258 | "refusal": llm.StopReasonRefusal, |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 259 | } |
| 260 | ) |
| 261 | |
| 262 | func fromLLMCache(c bool) json.RawMessage { |
| 263 | if !c { |
| 264 | return nil |
| 265 | } |
| 266 | return json.RawMessage(`{"type":"ephemeral"}`) |
| 267 | } |
| 268 | |
| 269 | func fromLLMContent(c llm.Content) content { |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 270 | var toolResult []content |
| 271 | if len(c.ToolResult) > 0 { |
| 272 | toolResult = make([]content, len(c.ToolResult)) |
| 273 | for i, tr := range c.ToolResult { |
| 274 | // For image content inside a tool_result, we need to map it to "image" type |
| 275 | if tr.MediaType != "" && tr.MediaType == "image/jpeg" || tr.MediaType == "image/png" { |
| 276 | // Format as an image for Claude |
| 277 | toolResult[i] = content{ |
| 278 | Type: "image", |
| 279 | Source: json.RawMessage(fmt.Sprintf(`{"type":"base64","media_type":"%s","data":"%s"}`, |
| 280 | tr.MediaType, tr.Data)), |
| 281 | } |
| 282 | } else { |
| 283 | toolResult[i] = fromLLMContent(tr) |
| 284 | } |
| 285 | } |
| 286 | } |
| 287 | |
| 288 | d := content{ |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 289 | ID: c.ID, |
| 290 | Type: fromLLMContentType[c.Type], |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 291 | MediaType: c.MediaType, |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 292 | Thinking: c.Thinking, |
| 293 | Data: c.Data, |
| 294 | Signature: c.Signature, |
| 295 | ToolName: c.ToolName, |
| 296 | ToolInput: c.ToolInput, |
| 297 | ToolUseID: c.ToolUseID, |
| 298 | ToolError: c.ToolError, |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 299 | ToolResult: toolResult, |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 300 | CacheControl: fromLLMCache(c.Cache), |
| 301 | } |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 302 | // Anthropic API complains if Text is specified when it shouldn't be |
| 303 | // or not specified when it's the empty string. |
| 304 | if c.Type != llm.ContentTypeToolResult && c.Type != llm.ContentTypeToolUse { |
| 305 | d.Text = &c.Text |
| 306 | } |
| 307 | return d |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 308 | } |
| 309 | |
| 310 | func fromLLMToolUse(tu *llm.ToolUse) *toolUse { |
| 311 | if tu == nil { |
| 312 | return nil |
| 313 | } |
| 314 | return &toolUse{ |
| 315 | ID: tu.ID, |
| 316 | Name: tu.Name, |
| 317 | } |
| 318 | } |
| 319 | |
| 320 | func fromLLMMessage(msg llm.Message) message { |
| 321 | return message{ |
| 322 | Role: fromLLMRole[msg.Role], |
| 323 | Content: mapped(msg.Content, fromLLMContent), |
| 324 | ToolUse: fromLLMToolUse(msg.ToolUse), |
| 325 | } |
| 326 | } |
| 327 | |
| 328 | func fromLLMToolChoice(tc *llm.ToolChoice) *toolChoice { |
| 329 | if tc == nil { |
| 330 | return nil |
| 331 | } |
| 332 | return &toolChoice{ |
| 333 | Type: fromLLMToolChoiceType[tc.Type], |
| 334 | Name: tc.Name, |
| 335 | } |
| 336 | } |
| 337 | |
| 338 | func fromLLMTool(t *llm.Tool) *tool { |
| 339 | return &tool{ |
| 340 | Name: t.Name, |
| 341 | Type: t.Type, |
| 342 | Description: t.Description, |
| 343 | InputSchema: t.InputSchema, |
| 344 | } |
| 345 | } |
| 346 | |
| 347 | func fromLLMSystem(s llm.SystemContent) systemContent { |
| 348 | return systemContent{ |
| 349 | Text: s.Text, |
| 350 | Type: s.Type, |
| 351 | CacheControl: fromLLMCache(s.Cache), |
| 352 | } |
| 353 | } |
| 354 | |
| 355 | func (s *Service) fromLLMRequest(r *llm.Request) *request { |
| 356 | return &request{ |
| 357 | Model: cmp.Or(s.Model, DefaultModel), |
| 358 | Messages: mapped(r.Messages, fromLLMMessage), |
| 359 | MaxTokens: cmp.Or(s.MaxTokens, DefaultMaxTokens), |
| 360 | ToolChoice: fromLLMToolChoice(r.ToolChoice), |
| 361 | Tools: mapped(r.Tools, fromLLMTool), |
| 362 | System: mapped(r.System, fromLLMSystem), |
| 363 | } |
| 364 | } |
| 365 | |
| 366 | func toLLMUsage(u usage) llm.Usage { |
| 367 | return llm.Usage{ |
| 368 | InputTokens: u.InputTokens, |
| 369 | CacheCreationInputTokens: u.CacheCreationInputTokens, |
| 370 | CacheReadInputTokens: u.CacheReadInputTokens, |
| 371 | OutputTokens: u.OutputTokens, |
| 372 | CostUSD: u.CostUSD, |
| 373 | } |
| 374 | } |
| 375 | |
| 376 | func toLLMContent(c content) llm.Content { |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 377 | // Convert toolResult from []content to []llm.Content |
| 378 | var toolResultContents []llm.Content |
| 379 | if len(c.ToolResult) > 0 { |
| 380 | toolResultContents = make([]llm.Content, len(c.ToolResult)) |
| 381 | for i, tr := range c.ToolResult { |
| 382 | toolResultContents[i] = toLLMContent(tr) |
| 383 | } |
| 384 | } |
| 385 | |
| 386 | ret := llm.Content{ |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 387 | ID: c.ID, |
| 388 | Type: toLLMContentType[c.Type], |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 389 | MediaType: c.MediaType, |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 390 | Thinking: c.Thinking, |
| 391 | Data: c.Data, |
| 392 | Signature: c.Signature, |
| 393 | ToolName: c.ToolName, |
| 394 | ToolInput: c.ToolInput, |
| 395 | ToolUseID: c.ToolUseID, |
| 396 | ToolError: c.ToolError, |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 397 | ToolResult: toolResultContents, |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 398 | } |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 399 | if c.Text != nil { |
| 400 | ret.Text = *c.Text |
| 401 | } |
| 402 | return ret |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 403 | } |
| 404 | |
| 405 | func toLLMResponse(r *response) *llm.Response { |
| 406 | return &llm.Response{ |
| 407 | ID: r.ID, |
| 408 | Type: r.Type, |
| 409 | Role: toLLMRole[r.Role], |
| 410 | Model: r.Model, |
| 411 | Content: mapped(r.Content, toLLMContent), |
| 412 | StopReason: toLLMStopReason[r.StopReason], |
| 413 | StopSequence: r.StopSequence, |
| 414 | Usage: toLLMUsage(r.Usage), |
| 415 | } |
| 416 | } |
| 417 | |
| 418 | // Do sends a request to Anthropic. |
| 419 | func (s *Service) Do(ctx context.Context, ir *llm.Request) (*llm.Response, error) { |
| 420 | request := s.fromLLMRequest(ir) |
| 421 | |
| 422 | var payload []byte |
| 423 | var err error |
| Josh Bleecher Snyder | 57afbca | 2025-07-23 13:29:59 -0700 | [diff] [blame^] | 424 | if s.DumpLLM || testing.Testing() { |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 425 | payload, err = json.MarshalIndent(request, "", " ") |
| 426 | } else { |
| 427 | payload, err = json.Marshal(request) |
| 428 | payload = append(payload, '\n') |
| 429 | } |
| 430 | if err != nil { |
| 431 | return nil, err |
| 432 | } |
| 433 | |
| 434 | if false { |
| 435 | fmt.Printf("claude request payload:\n%s\n", payload) |
| 436 | } |
| 437 | |
| 438 | backoff := []time.Duration{15 * time.Second, 30 * time.Second, time.Minute} |
| 439 | largerMaxTokens := false |
| 440 | var partialUsage usage |
| 441 | |
| 442 | url := cmp.Or(s.URL, DefaultURL) |
| 443 | httpc := cmp.Or(s.HTTPC, http.DefaultClient) |
| 444 | |
| 445 | // retry loop |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 446 | var errs error // accumulated errors across all attempts |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 447 | for attempts := 0; ; attempts++ { |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 448 | if attempts > 10 { |
| 449 | return nil, fmt.Errorf("anthropic request failed after %d attempts: %w", attempts, errs) |
| 450 | } |
| 451 | if attempts > 0 { |
| 452 | sleep := backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second))) |
| 453 | slog.WarnContext(ctx, "anthropic request sleep before retry", "sleep", sleep, "attempts", attempts) |
| 454 | time.Sleep(sleep) |
| 455 | } |
| Josh Bleecher Snyder | 57afbca | 2025-07-23 13:29:59 -0700 | [diff] [blame^] | 456 | if s.DumpLLM { |
| 457 | if err := llm.DumpToFile("request", url, payload); err != nil { |
| Josh Bleecher Snyder | e75d0ea | 2025-07-21 23:50:44 +0000 | [diff] [blame] | 458 | slog.WarnContext(ctx, "failed to dump request to file", "error", err) |
| 459 | } |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 460 | } |
| 461 | req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(payload)) |
| 462 | if err != nil { |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 463 | return nil, errors.Join(errs, err) |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 464 | } |
| 465 | |
| 466 | req.Header.Set("Content-Type", "application/json") |
| 467 | req.Header.Set("X-API-Key", s.APIKey) |
| 468 | req.Header.Set("Anthropic-Version", "2023-06-01") |
| 469 | |
| 470 | var features []string |
| 471 | if request.TokenEfficientToolUse { |
| 472 | features = append(features, "token-efficient-tool-use-2025-02-19") |
| 473 | } |
| 474 | if largerMaxTokens { |
| 475 | features = append(features, "output-128k-2025-02-19") |
| 476 | request.MaxTokens = 128 * 1024 |
| 477 | } |
| 478 | if len(features) > 0 { |
| 479 | req.Header.Set("anthropic-beta", strings.Join(features, ",")) |
| 480 | } |
| 481 | |
| 482 | resp, err := httpc.Do(req) |
| 483 | if err != nil { |
| Josh Bleecher Snyder | 3b5646f | 2025-05-23 16:47:53 +0000 | [diff] [blame] | 484 | // Don't retry httprr cache misses |
| 485 | if strings.Contains(err.Error(), "cached HTTP response not found") { |
| 486 | return nil, err |
| 487 | } |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 488 | errs = errors.Join(errs, err) |
| 489 | continue |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 490 | } |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 491 | buf, err := io.ReadAll(resp.Body) |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 492 | resp.Body.Close() |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 493 | if err != nil { |
| 494 | errs = errors.Join(errs, err) |
| 495 | continue |
| 496 | } |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 497 | |
| 498 | switch { |
| 499 | case resp.StatusCode == http.StatusOK: |
| Josh Bleecher Snyder | 57afbca | 2025-07-23 13:29:59 -0700 | [diff] [blame^] | 500 | if s.DumpLLM { |
| 501 | if err := llm.DumpToFile("response", "", buf); err != nil { |
| Josh Bleecher Snyder | e75d0ea | 2025-07-21 23:50:44 +0000 | [diff] [blame] | 502 | slog.WarnContext(ctx, "failed to dump response to file", "error", err) |
| 503 | } |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 504 | } |
| 505 | var response response |
| 506 | err = json.NewDecoder(bytes.NewReader(buf)).Decode(&response) |
| 507 | if err != nil { |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 508 | return nil, errors.Join(errs, err) |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 509 | } |
| 510 | if response.StopReason == "max_tokens" && !largerMaxTokens { |
| Josh Bleecher Snyder | 29fea84 | 2025-05-06 01:51:09 +0000 | [diff] [blame] | 511 | slog.InfoContext(ctx, "anthropic_retrying_with_larger_tokens", "message", "Retrying Anthropic API call with larger max tokens size") |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 512 | // Retry with more output tokens. |
| 513 | largerMaxTokens = true |
| Josh Bleecher Snyder | 59bb27d | 2025-06-05 07:32:10 -0700 | [diff] [blame] | 514 | response.Usage.CostUSD = llm.CostUSDFromResponse(resp.Header) |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 515 | partialUsage = response.Usage |
| 516 | continue |
| 517 | } |
| 518 | |
| 519 | // Calculate and set the cost_usd field |
| 520 | if largerMaxTokens { |
| 521 | response.Usage.Add(partialUsage) |
| 522 | } |
| Josh Bleecher Snyder | 59bb27d | 2025-06-05 07:32:10 -0700 | [diff] [blame] | 523 | response.Usage.CostUSD = llm.CostUSDFromResponse(resp.Header) |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 524 | |
| 525 | return toLLMResponse(&response), nil |
| 526 | case resp.StatusCode >= 500 && resp.StatusCode < 600: |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 527 | // server error, retry |
| 528 | slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode) |
| 529 | errs = errors.Join(errs, fmt.Errorf("status %v: %s", resp.Status, buf)) |
| 530 | continue |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 531 | case resp.StatusCode == 429: |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 532 | // rate limited, retry |
| 533 | slog.WarnContext(ctx, "anthropic_request_rate_limited", "response", string(buf)) |
| 534 | errs = errors.Join(errs, fmt.Errorf("status %v: %s", resp.Status, buf)) |
| 535 | continue |
| 536 | case resp.StatusCode >= 400 && resp.StatusCode < 500: |
| 537 | // some other 400, probably unrecoverable |
| 538 | slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode) |
| 539 | return nil, errors.Join(errs, fmt.Errorf("status %v: %s", resp.Status, buf)) |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 540 | default: |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 541 | // ...retry, I guess? |
| 542 | slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode) |
| 543 | errs = errors.Join(errs, fmt.Errorf("status %v: %s", resp.Status, buf)) |
| 544 | continue |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 545 | } |
| 546 | } |
| 547 | } |