| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 1 | package ant |
| 2 | |
| 3 | import ( |
| 4 | "bytes" |
| 5 | "cmp" |
| 6 | "context" |
| 7 | "encoding/json" |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 8 | "errors" |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 9 | "fmt" |
| 10 | "io" |
| 11 | "log/slog" |
| 12 | "math/rand/v2" |
| 13 | "net/http" |
| 14 | "strings" |
| 15 | "testing" |
| 16 | "time" |
| 17 | |
| 18 | "sketch.dev/llm" |
| 19 | ) |
| 20 | |
| 21 | const ( |
| Josh Bleecher Snyder | 0efb29d | 2025-05-22 21:05:04 -0700 | [diff] [blame] | 22 | DefaultModel = Claude4Sonnet |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 23 | // See https://docs.anthropic.com/en/docs/about-claude/models/all-models for |
| 24 | // current maximums. There's currently a flag to enable 128k output (output-128k-2025-02-19) |
| 25 | DefaultMaxTokens = 8192 |
| Josh Bleecher Snyder | 44dfdce | 2025-07-23 13:02:29 -0700 | [diff] [blame] | 26 | APIKeyEnv = "ANTHROPIC_API_KEY" |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 27 | DefaultURL = "https://api.anthropic.com/v1/messages" |
| 28 | ) |
| 29 | |
| 30 | const ( |
| 31 | Claude35Sonnet = "claude-3-5-sonnet-20241022" |
| 32 | Claude35Haiku = "claude-3-5-haiku-20241022" |
| 33 | Claude37Sonnet = "claude-3-7-sonnet-20250219" |
| Josh Bleecher Snyder | 0e8073a | 2025-05-22 21:04:51 -0700 | [diff] [blame] | 34 | Claude4Sonnet = "claude-sonnet-4-20250514" |
| 35 | Claude4Opus = "claude-opus-4-20250514" |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 36 | ) |
| 37 | |
| Josh Bleecher Snyder | d2fe3ba | 2025-07-23 13:05:47 -0700 | [diff] [blame^] | 38 | // IsClaudeModel reports whether userName is a user-friendly Claude model. |
| 39 | // It uses ClaudeModelName under the hood. |
| 40 | func IsClaudeModel(userName string) bool { |
| 41 | return ClaudeModelName(userName) != "" |
| 42 | } |
| 43 | |
| 44 | // ClaudeModelName returns the Anthropic Claude model name for userName. |
| 45 | // It returns an empty string if userName is not a recognized Claude model. |
| 46 | func ClaudeModelName(userName string) string { |
| 47 | switch userName { |
| 48 | case "claude", "sonnet": |
| 49 | return Claude4Sonnet |
| 50 | case "opus": |
| 51 | return Claude4Opus |
| 52 | default: |
| 53 | return "" |
| 54 | } |
| 55 | } |
| 56 | |
| Philip Zeyliger | b8a8f35 | 2025-06-02 07:39:37 -0700 | [diff] [blame] | 57 | // TokenContextWindow returns the maximum token context window size for this service |
| 58 | func (s *Service) TokenContextWindow() int { |
| 59 | model := s.Model |
| 60 | if model == "" { |
| 61 | model = DefaultModel |
| 62 | } |
| 63 | |
| 64 | switch model { |
| 65 | case Claude35Sonnet, Claude37Sonnet: |
| 66 | return 200000 |
| 67 | case Claude35Haiku: |
| 68 | return 200000 |
| 69 | case Claude4Sonnet, Claude4Opus: |
| 70 | return 200000 |
| 71 | default: |
| 72 | // Default for unknown models |
| 73 | return 200000 |
| 74 | } |
| 75 | } |
| 76 | |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 77 | // Service provides Claude completions. |
| 78 | // Fields should not be altered concurrently with calling any method on Service. |
| 79 | type Service struct { |
| Josh Bleecher Snyder | 57afbca | 2025-07-23 13:29:59 -0700 | [diff] [blame] | 80 | HTTPC *http.Client // defaults to http.DefaultClient if nil |
| 81 | URL string // defaults to DefaultURL if empty |
| 82 | APIKey string // must be non-empty |
| 83 | Model string // defaults to DefaultModel if empty |
| 84 | MaxTokens int // defaults to DefaultMaxTokens if zero |
| 85 | DumpLLM bool // whether to dump request/response text to files for debugging; defaults to false |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 86 | } |
| 87 | |
| 88 | var _ llm.Service = (*Service)(nil) |
| 89 | |
| 90 | type content struct { |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 91 | // https://docs.anthropic.com/en/api/messages |
| 92 | ID string `json:"id,omitempty"` |
| 93 | Type string `json:"type,omitempty"` |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 94 | |
| 95 | // Subtly, an empty string appears in tool results often, so we have |
| 96 | // to distinguish between empty string and no string. |
| 97 | // Underlying error looks like one of: |
| 98 | // "messages.46.content.0.tool_result.content.0.text.text: Field required"" |
| 99 | // "messages.1.content.1.tool_use.text: Extra inputs are not permitted" |
| 100 | // |
| 101 | // I haven't found a super great source for the API, but |
| 102 | // https://github.com/anthropics/anthropic-sdk-typescript/blob/main/src/resources/messages/messages.ts |
| 103 | // is somewhat acceptable but hard to read. |
| 104 | Text *string `json:"text,omitempty"` |
| 105 | MediaType string `json:"media_type,omitempty"` // for image |
| 106 | Source json.RawMessage `json:"source,omitempty"` // for image |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 107 | |
| 108 | // for thinking |
| 109 | Thinking string `json:"thinking,omitempty"` |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 110 | Data string `json:"data,omitempty"` // for redacted_thinking or image |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 111 | Signature string `json:"signature,omitempty"` // for thinking |
| 112 | |
| 113 | // for tool_use |
| 114 | ToolName string `json:"name,omitempty"` |
| 115 | ToolInput json.RawMessage `json:"input,omitempty"` |
| 116 | |
| 117 | // for tool_result |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 118 | ToolUseID string `json:"tool_use_id,omitempty"` |
| 119 | ToolError bool `json:"is_error,omitempty"` |
| 120 | // note the recursive nature here; message looks like: |
| 121 | // { |
| 122 | // "role": "user", |
| 123 | // "content": [ |
| 124 | // { |
| 125 | // "type": "tool_result", |
| 126 | // "tool_use_id": "toolu_01A09q90qw90lq917835lq9", |
| 127 | // "content": [ |
| 128 | // {"type": "text", "text": "15 degrees"}, |
| 129 | // { |
| 130 | // "type": "image", |
| 131 | // "source": { |
| 132 | // "type": "base64", |
| 133 | // "media_type": "image/jpeg", |
| 134 | // "data": "/9j/4AAQSkZJRg...", |
| 135 | // } |
| 136 | // } |
| 137 | // ] |
| 138 | // } |
| 139 | // ] |
| 140 | //} |
| 141 | ToolResult []content `json:"content,omitempty"` |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 142 | |
| 143 | // timing information for tool_result; not sent to Claude |
| 144 | StartTime *time.Time `json:"-"` |
| 145 | EndTime *time.Time `json:"-"` |
| 146 | |
| 147 | CacheControl json.RawMessage `json:"cache_control,omitempty"` |
| 148 | } |
| 149 | |
| 150 | // message represents a message in the conversation. |
| 151 | type message struct { |
| 152 | Role string `json:"role"` |
| 153 | Content []content `json:"content"` |
| 154 | ToolUse *toolUse `json:"tool_use,omitempty"` // use to control whether/which tool to use |
| 155 | } |
| 156 | |
| 157 | // toolUse represents a tool use in the message content. |
| 158 | type toolUse struct { |
| 159 | ID string `json:"id"` |
| 160 | Name string `json:"name"` |
| 161 | } |
| 162 | |
| 163 | // tool represents a tool available to Claude. |
| 164 | type tool struct { |
| 165 | Name string `json:"name"` |
| 166 | // Type is used by the text editor tool; see |
| 167 | // https://docs.anthropic.com/en/docs/build-with-claude/tool-use/text-editor-tool |
| 168 | Type string `json:"type,omitempty"` |
| 169 | Description string `json:"description,omitempty"` |
| 170 | InputSchema json.RawMessage `json:"input_schema,omitempty"` |
| 171 | } |
| 172 | |
| 173 | // usage represents the billing and rate-limit usage. |
| 174 | type usage struct { |
| 175 | InputTokens uint64 `json:"input_tokens"` |
| 176 | CacheCreationInputTokens uint64 `json:"cache_creation_input_tokens"` |
| 177 | CacheReadInputTokens uint64 `json:"cache_read_input_tokens"` |
| 178 | OutputTokens uint64 `json:"output_tokens"` |
| 179 | CostUSD float64 `json:"cost_usd"` |
| 180 | } |
| 181 | |
| 182 | func (u *usage) Add(other usage) { |
| 183 | u.InputTokens += other.InputTokens |
| 184 | u.CacheCreationInputTokens += other.CacheCreationInputTokens |
| 185 | u.CacheReadInputTokens += other.CacheReadInputTokens |
| 186 | u.OutputTokens += other.OutputTokens |
| 187 | u.CostUSD += other.CostUSD |
| 188 | } |
| 189 | |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 190 | // response represents the response from the message API. |
| 191 | type response struct { |
| 192 | ID string `json:"id"` |
| 193 | Type string `json:"type"` |
| 194 | Role string `json:"role"` |
| 195 | Model string `json:"model"` |
| 196 | Content []content `json:"content"` |
| 197 | StopReason string `json:"stop_reason"` |
| 198 | StopSequence *string `json:"stop_sequence,omitempty"` |
| 199 | Usage usage `json:"usage"` |
| 200 | } |
| 201 | |
| 202 | type toolChoice struct { |
| 203 | Type string `json:"type"` |
| 204 | Name string `json:"name,omitempty"` |
| 205 | } |
| 206 | |
| 207 | // https://docs.anthropic.com/en/api/messages#body-system |
| 208 | type systemContent struct { |
| 209 | Text string `json:"text,omitempty"` |
| 210 | Type string `json:"type,omitempty"` |
| 211 | CacheControl json.RawMessage `json:"cache_control,omitempty"` |
| 212 | } |
| 213 | |
| 214 | // request represents the request payload for creating a message. |
| 215 | type request struct { |
| 216 | Model string `json:"model"` |
| 217 | Messages []message `json:"messages"` |
| 218 | ToolChoice *toolChoice `json:"tool_choice,omitempty"` |
| 219 | MaxTokens int `json:"max_tokens"` |
| 220 | Tools []*tool `json:"tools,omitempty"` |
| 221 | Stream bool `json:"stream,omitempty"` |
| 222 | System []systemContent `json:"system,omitempty"` |
| 223 | Temperature float64 `json:"temperature,omitempty"` |
| 224 | TopK int `json:"top_k,omitempty"` |
| 225 | TopP float64 `json:"top_p,omitempty"` |
| 226 | StopSequences []string `json:"stop_sequences,omitempty"` |
| 227 | |
| 228 | TokenEfficientToolUse bool `json:"-"` // DO NOT USE, broken on Anthropic's side as of 2025-02-28 |
| 229 | } |
| 230 | |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 231 | func mapped[Slice ~[]E, E, T any](s Slice, f func(E) T) []T { |
| 232 | out := make([]T, len(s)) |
| 233 | for i, v := range s { |
| 234 | out[i] = f(v) |
| 235 | } |
| 236 | return out |
| 237 | } |
| 238 | |
| 239 | func inverted[K, V cmp.Ordered](m map[K]V) map[V]K { |
| 240 | inv := make(map[V]K) |
| 241 | for k, v := range m { |
| 242 | if _, ok := inv[v]; ok { |
| 243 | panic(fmt.Errorf("inverted map has multiple keys for value %v", v)) |
| 244 | } |
| 245 | inv[v] = k |
| 246 | } |
| 247 | return inv |
| 248 | } |
| 249 | |
| 250 | var ( |
| 251 | fromLLMRole = map[llm.MessageRole]string{ |
| 252 | llm.MessageRoleAssistant: "assistant", |
| 253 | llm.MessageRoleUser: "user", |
| 254 | } |
| 255 | toLLMRole = inverted(fromLLMRole) |
| 256 | |
| 257 | fromLLMContentType = map[llm.ContentType]string{ |
| 258 | llm.ContentTypeText: "text", |
| 259 | llm.ContentTypeThinking: "thinking", |
| 260 | llm.ContentTypeRedactedThinking: "redacted_thinking", |
| 261 | llm.ContentTypeToolUse: "tool_use", |
| 262 | llm.ContentTypeToolResult: "tool_result", |
| 263 | } |
| 264 | toLLMContentType = inverted(fromLLMContentType) |
| 265 | |
| 266 | fromLLMToolChoiceType = map[llm.ToolChoiceType]string{ |
| 267 | llm.ToolChoiceTypeAuto: "auto", |
| 268 | llm.ToolChoiceTypeAny: "any", |
| 269 | llm.ToolChoiceTypeNone: "none", |
| 270 | llm.ToolChoiceTypeTool: "tool", |
| 271 | } |
| 272 | |
| 273 | toLLMStopReason = map[string]llm.StopReason{ |
| 274 | "stop_sequence": llm.StopReasonStopSequence, |
| 275 | "max_tokens": llm.StopReasonMaxTokens, |
| 276 | "end_turn": llm.StopReasonEndTurn, |
| 277 | "tool_use": llm.StopReasonToolUse, |
| Josh Bleecher Snyder | 0e8073a | 2025-05-22 21:04:51 -0700 | [diff] [blame] | 278 | "refusal": llm.StopReasonRefusal, |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 279 | } |
| 280 | ) |
| 281 | |
| 282 | func fromLLMCache(c bool) json.RawMessage { |
| 283 | if !c { |
| 284 | return nil |
| 285 | } |
| 286 | return json.RawMessage(`{"type":"ephemeral"}`) |
| 287 | } |
| 288 | |
| 289 | func fromLLMContent(c llm.Content) content { |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 290 | var toolResult []content |
| 291 | if len(c.ToolResult) > 0 { |
| 292 | toolResult = make([]content, len(c.ToolResult)) |
| 293 | for i, tr := range c.ToolResult { |
| 294 | // For image content inside a tool_result, we need to map it to "image" type |
| 295 | if tr.MediaType != "" && tr.MediaType == "image/jpeg" || tr.MediaType == "image/png" { |
| 296 | // Format as an image for Claude |
| 297 | toolResult[i] = content{ |
| 298 | Type: "image", |
| 299 | Source: json.RawMessage(fmt.Sprintf(`{"type":"base64","media_type":"%s","data":"%s"}`, |
| 300 | tr.MediaType, tr.Data)), |
| 301 | } |
| 302 | } else { |
| 303 | toolResult[i] = fromLLMContent(tr) |
| 304 | } |
| 305 | } |
| 306 | } |
| 307 | |
| 308 | d := content{ |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 309 | ID: c.ID, |
| 310 | Type: fromLLMContentType[c.Type], |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 311 | MediaType: c.MediaType, |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 312 | Thinking: c.Thinking, |
| 313 | Data: c.Data, |
| 314 | Signature: c.Signature, |
| 315 | ToolName: c.ToolName, |
| 316 | ToolInput: c.ToolInput, |
| 317 | ToolUseID: c.ToolUseID, |
| 318 | ToolError: c.ToolError, |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 319 | ToolResult: toolResult, |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 320 | CacheControl: fromLLMCache(c.Cache), |
| 321 | } |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 322 | // Anthropic API complains if Text is specified when it shouldn't be |
| 323 | // or not specified when it's the empty string. |
| 324 | if c.Type != llm.ContentTypeToolResult && c.Type != llm.ContentTypeToolUse { |
| 325 | d.Text = &c.Text |
| 326 | } |
| 327 | return d |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 328 | } |
| 329 | |
| 330 | func fromLLMToolUse(tu *llm.ToolUse) *toolUse { |
| 331 | if tu == nil { |
| 332 | return nil |
| 333 | } |
| 334 | return &toolUse{ |
| 335 | ID: tu.ID, |
| 336 | Name: tu.Name, |
| 337 | } |
| 338 | } |
| 339 | |
| 340 | func fromLLMMessage(msg llm.Message) message { |
| 341 | return message{ |
| 342 | Role: fromLLMRole[msg.Role], |
| 343 | Content: mapped(msg.Content, fromLLMContent), |
| 344 | ToolUse: fromLLMToolUse(msg.ToolUse), |
| 345 | } |
| 346 | } |
| 347 | |
| 348 | func fromLLMToolChoice(tc *llm.ToolChoice) *toolChoice { |
| 349 | if tc == nil { |
| 350 | return nil |
| 351 | } |
| 352 | return &toolChoice{ |
| 353 | Type: fromLLMToolChoiceType[tc.Type], |
| 354 | Name: tc.Name, |
| 355 | } |
| 356 | } |
| 357 | |
| 358 | func fromLLMTool(t *llm.Tool) *tool { |
| 359 | return &tool{ |
| 360 | Name: t.Name, |
| 361 | Type: t.Type, |
| 362 | Description: t.Description, |
| 363 | InputSchema: t.InputSchema, |
| 364 | } |
| 365 | } |
| 366 | |
| 367 | func fromLLMSystem(s llm.SystemContent) systemContent { |
| 368 | return systemContent{ |
| 369 | Text: s.Text, |
| 370 | Type: s.Type, |
| 371 | CacheControl: fromLLMCache(s.Cache), |
| 372 | } |
| 373 | } |
| 374 | |
| 375 | func (s *Service) fromLLMRequest(r *llm.Request) *request { |
| 376 | return &request{ |
| 377 | Model: cmp.Or(s.Model, DefaultModel), |
| 378 | Messages: mapped(r.Messages, fromLLMMessage), |
| 379 | MaxTokens: cmp.Or(s.MaxTokens, DefaultMaxTokens), |
| 380 | ToolChoice: fromLLMToolChoice(r.ToolChoice), |
| 381 | Tools: mapped(r.Tools, fromLLMTool), |
| 382 | System: mapped(r.System, fromLLMSystem), |
| 383 | } |
| 384 | } |
| 385 | |
| 386 | func toLLMUsage(u usage) llm.Usage { |
| 387 | return llm.Usage{ |
| 388 | InputTokens: u.InputTokens, |
| 389 | CacheCreationInputTokens: u.CacheCreationInputTokens, |
| 390 | CacheReadInputTokens: u.CacheReadInputTokens, |
| 391 | OutputTokens: u.OutputTokens, |
| 392 | CostUSD: u.CostUSD, |
| 393 | } |
| 394 | } |
| 395 | |
| 396 | func toLLMContent(c content) llm.Content { |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 397 | // Convert toolResult from []content to []llm.Content |
| 398 | var toolResultContents []llm.Content |
| 399 | if len(c.ToolResult) > 0 { |
| 400 | toolResultContents = make([]llm.Content, len(c.ToolResult)) |
| 401 | for i, tr := range c.ToolResult { |
| 402 | toolResultContents[i] = toLLMContent(tr) |
| 403 | } |
| 404 | } |
| 405 | |
| 406 | ret := llm.Content{ |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 407 | ID: c.ID, |
| 408 | Type: toLLMContentType[c.Type], |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 409 | MediaType: c.MediaType, |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 410 | Thinking: c.Thinking, |
| 411 | Data: c.Data, |
| 412 | Signature: c.Signature, |
| 413 | ToolName: c.ToolName, |
| 414 | ToolInput: c.ToolInput, |
| 415 | ToolUseID: c.ToolUseID, |
| 416 | ToolError: c.ToolError, |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 417 | ToolResult: toolResultContents, |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 418 | } |
| Philip Zeyliger | 72252cb | 2025-05-10 17:00:08 -0700 | [diff] [blame] | 419 | if c.Text != nil { |
| 420 | ret.Text = *c.Text |
| 421 | } |
| 422 | return ret |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 423 | } |
| 424 | |
| 425 | func toLLMResponse(r *response) *llm.Response { |
| 426 | return &llm.Response{ |
| 427 | ID: r.ID, |
| 428 | Type: r.Type, |
| 429 | Role: toLLMRole[r.Role], |
| 430 | Model: r.Model, |
| 431 | Content: mapped(r.Content, toLLMContent), |
| 432 | StopReason: toLLMStopReason[r.StopReason], |
| 433 | StopSequence: r.StopSequence, |
| 434 | Usage: toLLMUsage(r.Usage), |
| 435 | } |
| 436 | } |
| 437 | |
| 438 | // Do sends a request to Anthropic. |
| 439 | func (s *Service) Do(ctx context.Context, ir *llm.Request) (*llm.Response, error) { |
| 440 | request := s.fromLLMRequest(ir) |
| 441 | |
| 442 | var payload []byte |
| 443 | var err error |
| Josh Bleecher Snyder | 57afbca | 2025-07-23 13:29:59 -0700 | [diff] [blame] | 444 | if s.DumpLLM || testing.Testing() { |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 445 | payload, err = json.MarshalIndent(request, "", " ") |
| 446 | } else { |
| 447 | payload, err = json.Marshal(request) |
| 448 | payload = append(payload, '\n') |
| 449 | } |
| 450 | if err != nil { |
| 451 | return nil, err |
| 452 | } |
| 453 | |
| 454 | if false { |
| 455 | fmt.Printf("claude request payload:\n%s\n", payload) |
| 456 | } |
| 457 | |
| 458 | backoff := []time.Duration{15 * time.Second, 30 * time.Second, time.Minute} |
| 459 | largerMaxTokens := false |
| 460 | var partialUsage usage |
| 461 | |
| 462 | url := cmp.Or(s.URL, DefaultURL) |
| 463 | httpc := cmp.Or(s.HTTPC, http.DefaultClient) |
| 464 | |
| 465 | // retry loop |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 466 | var errs error // accumulated errors across all attempts |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 467 | for attempts := 0; ; attempts++ { |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 468 | if attempts > 10 { |
| 469 | return nil, fmt.Errorf("anthropic request failed after %d attempts: %w", attempts, errs) |
| 470 | } |
| 471 | if attempts > 0 { |
| 472 | sleep := backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second))) |
| 473 | slog.WarnContext(ctx, "anthropic request sleep before retry", "sleep", sleep, "attempts", attempts) |
| 474 | time.Sleep(sleep) |
| 475 | } |
| Josh Bleecher Snyder | 57afbca | 2025-07-23 13:29:59 -0700 | [diff] [blame] | 476 | if s.DumpLLM { |
| 477 | if err := llm.DumpToFile("request", url, payload); err != nil { |
| Josh Bleecher Snyder | e75d0ea | 2025-07-21 23:50:44 +0000 | [diff] [blame] | 478 | slog.WarnContext(ctx, "failed to dump request to file", "error", err) |
| 479 | } |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 480 | } |
| 481 | req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(payload)) |
| 482 | if err != nil { |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 483 | return nil, errors.Join(errs, err) |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 484 | } |
| 485 | |
| 486 | req.Header.Set("Content-Type", "application/json") |
| 487 | req.Header.Set("X-API-Key", s.APIKey) |
| 488 | req.Header.Set("Anthropic-Version", "2023-06-01") |
| 489 | |
| 490 | var features []string |
| 491 | if request.TokenEfficientToolUse { |
| 492 | features = append(features, "token-efficient-tool-use-2025-02-19") |
| 493 | } |
| 494 | if largerMaxTokens { |
| 495 | features = append(features, "output-128k-2025-02-19") |
| 496 | request.MaxTokens = 128 * 1024 |
| 497 | } |
| 498 | if len(features) > 0 { |
| 499 | req.Header.Set("anthropic-beta", strings.Join(features, ",")) |
| 500 | } |
| 501 | |
| 502 | resp, err := httpc.Do(req) |
| 503 | if err != nil { |
| Josh Bleecher Snyder | 3b5646f | 2025-05-23 16:47:53 +0000 | [diff] [blame] | 504 | // Don't retry httprr cache misses |
| 505 | if strings.Contains(err.Error(), "cached HTTP response not found") { |
| 506 | return nil, err |
| 507 | } |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 508 | errs = errors.Join(errs, err) |
| 509 | continue |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 510 | } |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 511 | buf, err := io.ReadAll(resp.Body) |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 512 | resp.Body.Close() |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 513 | if err != nil { |
| 514 | errs = errors.Join(errs, err) |
| 515 | continue |
| 516 | } |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 517 | |
| 518 | switch { |
| 519 | case resp.StatusCode == http.StatusOK: |
| Josh Bleecher Snyder | 57afbca | 2025-07-23 13:29:59 -0700 | [diff] [blame] | 520 | if s.DumpLLM { |
| 521 | if err := llm.DumpToFile("response", "", buf); err != nil { |
| Josh Bleecher Snyder | e75d0ea | 2025-07-21 23:50:44 +0000 | [diff] [blame] | 522 | slog.WarnContext(ctx, "failed to dump response to file", "error", err) |
| 523 | } |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 524 | } |
| 525 | var response response |
| 526 | err = json.NewDecoder(bytes.NewReader(buf)).Decode(&response) |
| 527 | if err != nil { |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 528 | return nil, errors.Join(errs, err) |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 529 | } |
| 530 | if response.StopReason == "max_tokens" && !largerMaxTokens { |
| Josh Bleecher Snyder | 29fea84 | 2025-05-06 01:51:09 +0000 | [diff] [blame] | 531 | slog.InfoContext(ctx, "anthropic_retrying_with_larger_tokens", "message", "Retrying Anthropic API call with larger max tokens size") |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 532 | // Retry with more output tokens. |
| 533 | largerMaxTokens = true |
| Josh Bleecher Snyder | 59bb27d | 2025-06-05 07:32:10 -0700 | [diff] [blame] | 534 | response.Usage.CostUSD = llm.CostUSDFromResponse(resp.Header) |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 535 | partialUsage = response.Usage |
| 536 | continue |
| 537 | } |
| 538 | |
| 539 | // Calculate and set the cost_usd field |
| 540 | if largerMaxTokens { |
| 541 | response.Usage.Add(partialUsage) |
| 542 | } |
| Josh Bleecher Snyder | 59bb27d | 2025-06-05 07:32:10 -0700 | [diff] [blame] | 543 | response.Usage.CostUSD = llm.CostUSDFromResponse(resp.Header) |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 544 | |
| 545 | return toLLMResponse(&response), nil |
| 546 | case resp.StatusCode >= 500 && resp.StatusCode < 600: |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 547 | // server error, retry |
| 548 | slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode) |
| 549 | errs = errors.Join(errs, fmt.Errorf("status %v: %s", resp.Status, buf)) |
| 550 | continue |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 551 | case resp.StatusCode == 429: |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 552 | // rate limited, retry |
| 553 | slog.WarnContext(ctx, "anthropic_request_rate_limited", "response", string(buf)) |
| 554 | errs = errors.Join(errs, fmt.Errorf("status %v: %s", resp.Status, buf)) |
| 555 | continue |
| 556 | case resp.StatusCode >= 400 && resp.StatusCode < 500: |
| 557 | // some other 400, probably unrecoverable |
| 558 | slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode) |
| 559 | return nil, errors.Join(errs, fmt.Errorf("status %v: %s", resp.Status, buf)) |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 560 | default: |
| Josh Bleecher Snyder | a4500c9 | 2025-05-15 15:38:32 -0700 | [diff] [blame] | 561 | // ...retry, I guess? |
| 562 | slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode) |
| 563 | errs = errors.Join(errs, fmt.Errorf("status %v: %s", resp.Status, buf)) |
| 564 | continue |
| Josh Bleecher Snyder | 4f84ab7 | 2025-04-22 16:40:54 -0700 | [diff] [blame] | 565 | } |
| 566 | } |
| 567 | } |