Blame - llm/ant/ant.go - sketch

blob: 1dcff4ed936afbcc34455adced2a7dad15ba75fe [file] [log] [blame]

Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	1	package ant
				2
				3	import (
				4	"bytes"
				5	"cmp"
				6	"context"
				7	"encoding/json"
Josh Bleecher Snyder	a4500c9	2025-05-15 15:38:32 -0700	[diff] [blame]	8	"errors"
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	9	"fmt"
				10	"io"
				11	"log/slog"
				12	"math/rand/v2"
				13	"net/http"
				14	"strings"
				15	"testing"
				16	"time"
				17
				18	"sketch.dev/llm"
				19	)
				20
				21	const (
Josh Bleecher Snyder	0efb29d	2025-05-22 21:05:04 -0700	[diff] [blame]	22	DefaultModel = Claude4Sonnet
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	23	// See https://docs.anthropic.com/en/docs/about-claude/models/all-models for
				24	// current maximums. There's currently a flag to enable 128k output (output-128k-2025-02-19)
				25	DefaultMaxTokens = 8192
				26	DefaultURL = "https://api.anthropic.com/v1/messages"
				27	)
				28
				29	const (
				30	Claude35Sonnet = "claude-3-5-sonnet-20241022"
				31	Claude35Haiku = "claude-3-5-haiku-20241022"
				32	Claude37Sonnet = "claude-3-7-sonnet-20250219"
Josh Bleecher Snyder	0e8073a	2025-05-22 21:04:51 -0700	[diff] [blame]	33	Claude4Sonnet = "claude-sonnet-4-20250514"
				34	Claude4Opus = "claude-opus-4-20250514"
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	35	)
				36
				37	// Service provides Claude completions.
				38	// Fields should not be altered concurrently with calling any method on Service.
				39	type Service struct {
				40	HTTPC *http.Client // defaults to http.DefaultClient if nil
				41	URL string // defaults to DefaultURL if empty
				42	APIKey string // must be non-empty
				43	Model string // defaults to DefaultModel if empty
				44	MaxTokens int // defaults to DefaultMaxTokens if zero
				45	}
				46
				47	var _ llm.Service = (*Service)(nil)
				48
				49	type content struct {
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	50	// https://docs.anthropic.com/en/api/messages
				51	ID string `json:"id,omitempty"`
				52	Type string `json:"type,omitempty"`
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	53
				54	// Subtly, an empty string appears in tool results often, so we have
				55	// to distinguish between empty string and no string.
				56	// Underlying error looks like one of:
				57	// "messages.46.content.0.tool_result.content.0.text.text: Field required""
				58	// "messages.1.content.1.tool_use.text: Extra inputs are not permitted"
				59	//
				60	// I haven't found a super great source for the API, but
				61	// https://github.com/anthropics/anthropic-sdk-typescript/blob/main/src/resources/messages/messages.ts
				62	// is somewhat acceptable but hard to read.
				63	Text *string `json:"text,omitempty"`
				64	MediaType string `json:"media_type,omitempty"` // for image
				65	Source json.RawMessage `json:"source,omitempty"` // for image
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	66
				67	// for thinking
				68	Thinking string `json:"thinking,omitempty"`
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	69	Data string `json:"data,omitempty"` // for redacted_thinking or image
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	70	Signature string `json:"signature,omitempty"` // for thinking
				71
				72	// for tool_use
				73	ToolName string `json:"name,omitempty"`
				74	ToolInput json.RawMessage `json:"input,omitempty"`
				75
				76	// for tool_result
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	77	ToolUseID string `json:"tool_use_id,omitempty"`
				78	ToolError bool `json:"is_error,omitempty"`
				79	// note the recursive nature here; message looks like:
				80	// {
				81	// "role": "user",
				82	// "content": [
				83	// {
				84	// "type": "tool_result",
				85	// "tool_use_id": "toolu_01A09q90qw90lq917835lq9",
				86	// "content": [
				87	// {"type": "text", "text": "15 degrees"},
				88	// {
				89	// "type": "image",
				90	// "source": {
				91	// "type": "base64",
				92	// "media_type": "image/jpeg",
				93	// "data": "/9j/4AAQSkZJRg...",
				94	// }
				95	// }
				96	// ]
				97	// }
				98	// ]
				99	//}
				100	ToolResult []content `json:"content,omitempty"`
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	101
				102	// timing information for tool_result; not sent to Claude
				103	StartTime *time.Time `json:"-"`
				104	EndTime *time.Time `json:"-"`
				105
				106	CacheControl json.RawMessage `json:"cache_control,omitempty"`
				107	}
				108
				109	// message represents a message in the conversation.
				110	type message struct {
				111	Role string `json:"role"`
				112	Content []content `json:"content"`
				113	ToolUse *toolUse `json:"tool_use,omitempty"` // use to control whether/which tool to use
				114	}
				115
				116	// toolUse represents a tool use in the message content.
				117	type toolUse struct {
				118	ID string `json:"id"`
				119	Name string `json:"name"`
				120	}
				121
				122	// tool represents a tool available to Claude.
				123	type tool struct {
				124	Name string `json:"name"`
				125	// Type is used by the text editor tool; see
				126	// https://docs.anthropic.com/en/docs/build-with-claude/tool-use/text-editor-tool
				127	Type string `json:"type,omitempty"`
				128	Description string `json:"description,omitempty"`
				129	InputSchema json.RawMessage `json:"input_schema,omitempty"`
				130	}
				131
				132	// usage represents the billing and rate-limit usage.
				133	type usage struct {
				134	InputTokens uint64 `json:"input_tokens"`
				135	CacheCreationInputTokens uint64 `json:"cache_creation_input_tokens"`
				136	CacheReadInputTokens uint64 `json:"cache_read_input_tokens"`
				137	OutputTokens uint64 `json:"output_tokens"`
				138	CostUSD float64 `json:"cost_usd"`
				139	}
				140
				141	func (u *usage) Add(other usage) {
				142	u.InputTokens += other.InputTokens
				143	u.CacheCreationInputTokens += other.CacheCreationInputTokens
				144	u.CacheReadInputTokens += other.CacheReadInputTokens
				145	u.OutputTokens += other.OutputTokens
				146	u.CostUSD += other.CostUSD
				147	}
				148
				149	type errorResponse struct {
				150	Type string `json:"type"`
				151	Message string `json:"message"`
				152	}
				153
				154	// response represents the response from the message API.
				155	type response struct {
				156	ID string `json:"id"`
				157	Type string `json:"type"`
				158	Role string `json:"role"`
				159	Model string `json:"model"`
				160	Content []content `json:"content"`
				161	StopReason string `json:"stop_reason"`
				162	StopSequence *string `json:"stop_sequence,omitempty"`
				163	Usage usage `json:"usage"`
				164	}
				165
				166	type toolChoice struct {
				167	Type string `json:"type"`
				168	Name string `json:"name,omitempty"`
				169	}
				170
				171	// https://docs.anthropic.com/en/api/messages#body-system
				172	type systemContent struct {
				173	Text string `json:"text,omitempty"`
				174	Type string `json:"type,omitempty"`
				175	CacheControl json.RawMessage `json:"cache_control,omitempty"`
				176	}
				177
				178	// request represents the request payload for creating a message.
				179	type request struct {
				180	Model string `json:"model"`
				181	Messages []message `json:"messages"`
				182	ToolChoice *toolChoice `json:"tool_choice,omitempty"`
				183	MaxTokens int `json:"max_tokens"`
				184	Tools []*tool `json:"tools,omitempty"`
				185	Stream bool `json:"stream,omitempty"`
				186	System []systemContent `json:"system,omitempty"`
				187	Temperature float64 `json:"temperature,omitempty"`
				188	TopK int `json:"top_k,omitempty"`
				189	TopP float64 `json:"top_p,omitempty"`
				190	StopSequences []string `json:"stop_sequences,omitempty"`
				191
				192	TokenEfficientToolUse bool `json:"-"` // DO NOT USE, broken on Anthropic's side as of 2025-02-28
				193	}
				194
				195	const dumpText = false // debugging toggle to see raw communications with Claude
				196
				197	func mapped[Slice ~[]E, E, T any](s Slice, f func(E) T) []T {
				198	out := make([]T, len(s))
				199	for i, v := range s {
				200	out[i] = f(v)
				201	}
				202	return out
				203	}
				204
				205	func inverted[K, V cmp.Ordered](m map[K]V) map[V]K {
				206	inv := make(map[V]K)
				207	for k, v := range m {
				208	if _, ok := inv[v]; ok {
				209	panic(fmt.Errorf("inverted map has multiple keys for value %v", v))
				210	}
				211	inv[v] = k
				212	}
				213	return inv
				214	}
				215
				216	var (
				217	fromLLMRole = map[llm.MessageRole]string{
				218	llm.MessageRoleAssistant: "assistant",
				219	llm.MessageRoleUser: "user",
				220	}
				221	toLLMRole = inverted(fromLLMRole)
				222
				223	fromLLMContentType = map[llm.ContentType]string{
				224	llm.ContentTypeText: "text",
				225	llm.ContentTypeThinking: "thinking",
				226	llm.ContentTypeRedactedThinking: "redacted_thinking",
				227	llm.ContentTypeToolUse: "tool_use",
				228	llm.ContentTypeToolResult: "tool_result",
				229	}
				230	toLLMContentType = inverted(fromLLMContentType)
				231
				232	fromLLMToolChoiceType = map[llm.ToolChoiceType]string{
				233	llm.ToolChoiceTypeAuto: "auto",
				234	llm.ToolChoiceTypeAny: "any",
				235	llm.ToolChoiceTypeNone: "none",
				236	llm.ToolChoiceTypeTool: "tool",
				237	}
				238
				239	toLLMStopReason = map[string]llm.StopReason{
				240	"stop_sequence": llm.StopReasonStopSequence,
				241	"max_tokens": llm.StopReasonMaxTokens,
				242	"end_turn": llm.StopReasonEndTurn,
				243	"tool_use": llm.StopReasonToolUse,
Josh Bleecher Snyder	0e8073a	2025-05-22 21:04:51 -0700	[diff] [blame]	244	"refusal": llm.StopReasonRefusal,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	245	}
				246	)
				247
				248	func fromLLMCache(c bool) json.RawMessage {
				249	if !c {
				250	return nil
				251	}
				252	return json.RawMessage(`{"type":"ephemeral"}`)
				253	}
				254
				255	func fromLLMContent(c llm.Content) content {
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	256	var toolResult []content
				257	if len(c.ToolResult) > 0 {
				258	toolResult = make([]content, len(c.ToolResult))
				259	for i, tr := range c.ToolResult {
				260	// For image content inside a tool_result, we need to map it to "image" type
				261	if tr.MediaType != "" && tr.MediaType == "image/jpeg" \|\| tr.MediaType == "image/png" {
				262	// Format as an image for Claude
				263	toolResult[i] = content{
				264	Type: "image",
				265	Source: json.RawMessage(fmt.Sprintf(`{"type":"base64","media_type":"%s","data":"%s"}`,
				266	tr.MediaType, tr.Data)),
				267	}
				268	} else {
				269	toolResult[i] = fromLLMContent(tr)
				270	}
				271	}
				272	}
				273
				274	d := content{
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	275	ID: c.ID,
				276	Type: fromLLMContentType[c.Type],
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	277	MediaType: c.MediaType,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	278	Thinking: c.Thinking,
				279	Data: c.Data,
				280	Signature: c.Signature,
				281	ToolName: c.ToolName,
				282	ToolInput: c.ToolInput,
				283	ToolUseID: c.ToolUseID,
				284	ToolError: c.ToolError,
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	285	ToolResult: toolResult,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	286	CacheControl: fromLLMCache(c.Cache),
				287	}
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	288	// Anthropic API complains if Text is specified when it shouldn't be
				289	// or not specified when it's the empty string.
				290	if c.Type != llm.ContentTypeToolResult && c.Type != llm.ContentTypeToolUse {
				291	d.Text = &c.Text
				292	}
				293	return d
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	294	}
				295
				296	func fromLLMToolUse(tu llm.ToolUse) toolUse {
				297	if tu == nil {
				298	return nil
				299	}
				300	return &toolUse{
				301	ID: tu.ID,
				302	Name: tu.Name,
				303	}
				304	}
				305
				306	func fromLLMMessage(msg llm.Message) message {
				307	return message{
				308	Role: fromLLMRole[msg.Role],
				309	Content: mapped(msg.Content, fromLLMContent),
				310	ToolUse: fromLLMToolUse(msg.ToolUse),
				311	}
				312	}
				313
				314	func fromLLMToolChoice(tc llm.ToolChoice) toolChoice {
				315	if tc == nil {
				316	return nil
				317	}
				318	return &toolChoice{
				319	Type: fromLLMToolChoiceType[tc.Type],
				320	Name: tc.Name,
				321	}
				322	}
				323
				324	func fromLLMTool(t llm.Tool) tool {
				325	return &tool{
				326	Name: t.Name,
				327	Type: t.Type,
				328	Description: t.Description,
				329	InputSchema: t.InputSchema,
				330	}
				331	}
				332
				333	func fromLLMSystem(s llm.SystemContent) systemContent {
				334	return systemContent{
				335	Text: s.Text,
				336	Type: s.Type,
				337	CacheControl: fromLLMCache(s.Cache),
				338	}
				339	}
				340
				341	func (s Service) fromLLMRequest(r llm.Request) *request {
				342	return &request{
				343	Model: cmp.Or(s.Model, DefaultModel),
				344	Messages: mapped(r.Messages, fromLLMMessage),
				345	MaxTokens: cmp.Or(s.MaxTokens, DefaultMaxTokens),
				346	ToolChoice: fromLLMToolChoice(r.ToolChoice),
				347	Tools: mapped(r.Tools, fromLLMTool),
				348	System: mapped(r.System, fromLLMSystem),
				349	}
				350	}
				351
				352	func toLLMUsage(u usage) llm.Usage {
				353	return llm.Usage{
				354	InputTokens: u.InputTokens,
				355	CacheCreationInputTokens: u.CacheCreationInputTokens,
				356	CacheReadInputTokens: u.CacheReadInputTokens,
				357	OutputTokens: u.OutputTokens,
				358	CostUSD: u.CostUSD,
				359	}
				360	}
				361
				362	func toLLMContent(c content) llm.Content {
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	363	// Convert toolResult from []content to []llm.Content
				364	var toolResultContents []llm.Content
				365	if len(c.ToolResult) > 0 {
				366	toolResultContents = make([]llm.Content, len(c.ToolResult))
				367	for i, tr := range c.ToolResult {
				368	toolResultContents[i] = toLLMContent(tr)
				369	}
				370	}
				371
				372	ret := llm.Content{
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	373	ID: c.ID,
				374	Type: toLLMContentType[c.Type],
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	375	MediaType: c.MediaType,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	376	Thinking: c.Thinking,
				377	Data: c.Data,
				378	Signature: c.Signature,
				379	ToolName: c.ToolName,
				380	ToolInput: c.ToolInput,
				381	ToolUseID: c.ToolUseID,
				382	ToolError: c.ToolError,
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	383	ToolResult: toolResultContents,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	384	}
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	385	if c.Text != nil {
				386	ret.Text = *c.Text
				387	}
				388	return ret
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	389	}
				390
				391	func toLLMResponse(r response) llm.Response {
				392	return &llm.Response{
				393	ID: r.ID,
				394	Type: r.Type,
				395	Role: toLLMRole[r.Role],
				396	Model: r.Model,
				397	Content: mapped(r.Content, toLLMContent),
				398	StopReason: toLLMStopReason[r.StopReason],
				399	StopSequence: r.StopSequence,
				400	Usage: toLLMUsage(r.Usage),
				401	}
				402	}
				403
				404	// Do sends a request to Anthropic.
				405	func (s Service) Do(ctx context.Context, ir llm.Request) (*llm.Response, error) {
				406	request := s.fromLLMRequest(ir)
				407
				408	var payload []byte
				409	var err error
				410	if dumpText \|\| testing.Testing() {
				411	payload, err = json.MarshalIndent(request, "", " ")
				412	} else {
				413	payload, err = json.Marshal(request)
				414	payload = append(payload, '\n')
				415	}
				416	if err != nil {
				417	return nil, err
				418	}
				419
				420	if false {
				421	fmt.Printf("claude request payload:\n%s\n", payload)
				422	}
				423
				424	backoff := []time.Duration{15 * time.Second, 30 * time.Second, time.Minute}
				425	largerMaxTokens := false
				426	var partialUsage usage
				427
				428	url := cmp.Or(s.URL, DefaultURL)
				429	httpc := cmp.Or(s.HTTPC, http.DefaultClient)
				430
				431	// retry loop
Josh Bleecher Snyder	a4500c9	2025-05-15 15:38:32 -0700	[diff] [blame]	432	var errs error // accumulated errors across all attempts
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	433	for attempts := 0; ; attempts++ {
Josh Bleecher Snyder	a4500c9	2025-05-15 15:38:32 -0700	[diff] [blame]	434	if attempts > 10 {
				435	return nil, fmt.Errorf("anthropic request failed after %d attempts: %w", attempts, errs)
				436	}
				437	if attempts > 0 {
				438	sleep := backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
				439	slog.WarnContext(ctx, "anthropic request sleep before retry", "sleep", sleep, "attempts", attempts)
				440	time.Sleep(sleep)
				441	}
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	442	if dumpText {
				443	fmt.Printf("RAW REQUEST:\n%s\n\n", payload)
				444	}
				445	req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(payload))
				446	if err != nil {
Josh Bleecher Snyder	a4500c9	2025-05-15 15:38:32 -0700	[diff] [blame]	447	return nil, errors.Join(errs, err)
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	448	}
				449
				450	req.Header.Set("Content-Type", "application/json")
				451	req.Header.Set("X-API-Key", s.APIKey)
				452	req.Header.Set("Anthropic-Version", "2023-06-01")
				453
				454	var features []string
				455	if request.TokenEfficientToolUse {
				456	features = append(features, "token-efficient-tool-use-2025-02-19")
				457	}
				458	if largerMaxTokens {
				459	features = append(features, "output-128k-2025-02-19")
				460	request.MaxTokens = 128 * 1024
				461	}
				462	if len(features) > 0 {
				463	req.Header.Set("anthropic-beta", strings.Join(features, ","))
				464	}
				465
				466	resp, err := httpc.Do(req)
				467	if err != nil {
Josh Bleecher Snyder	3b5646f	2025-05-23 16:47:53 +0000	[diff] [blame]	468	// Don't retry httprr cache misses
				469	if strings.Contains(err.Error(), "cached HTTP response not found") {
				470	return nil, err
				471	}
Josh Bleecher Snyder	a4500c9	2025-05-15 15:38:32 -0700	[diff] [blame]	472	errs = errors.Join(errs, err)
				473	continue
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	474	}
Josh Bleecher Snyder	a4500c9	2025-05-15 15:38:32 -0700	[diff] [blame]	475	buf, err := io.ReadAll(resp.Body)
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	476	resp.Body.Close()
Josh Bleecher Snyder	a4500c9	2025-05-15 15:38:32 -0700	[diff] [blame]	477	if err != nil {
				478	errs = errors.Join(errs, err)
				479	continue
				480	}
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	481
				482	switch {
				483	case resp.StatusCode == http.StatusOK:
				484	if dumpText {
				485	fmt.Printf("RAW RESPONSE:\n%s\n\n", buf)
				486	}
				487	var response response
				488	err = json.NewDecoder(bytes.NewReader(buf)).Decode(&response)
				489	if err != nil {
Josh Bleecher Snyder	a4500c9	2025-05-15 15:38:32 -0700	[diff] [blame]	490	return nil, errors.Join(errs, err)
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	491	}
				492	if response.StopReason == "max_tokens" && !largerMaxTokens {
Josh Bleecher Snyder	29fea84	2025-05-06 01:51:09 +0000	[diff] [blame]	493	slog.InfoContext(ctx, "anthropic_retrying_with_larger_tokens", "message", "Retrying Anthropic API call with larger max tokens size")
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	494	// Retry with more output tokens.
				495	largerMaxTokens = true
				496	response.Usage.CostUSD = response.TotalDollars()
				497	partialUsage = response.Usage
				498	continue
				499	}
				500
				501	// Calculate and set the cost_usd field
				502	if largerMaxTokens {
				503	response.Usage.Add(partialUsage)
				504	}
				505	response.Usage.CostUSD = response.TotalDollars()
				506
				507	return toLLMResponse(&response), nil
				508	case resp.StatusCode >= 500 && resp.StatusCode < 600:
Josh Bleecher Snyder	a4500c9	2025-05-15 15:38:32 -0700	[diff] [blame]	509	// server error, retry
				510	slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode)
				511	errs = errors.Join(errs, fmt.Errorf("status %v: %s", resp.Status, buf))
				512	continue
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	513	case resp.StatusCode == 429:
Josh Bleecher Snyder	a4500c9	2025-05-15 15:38:32 -0700	[diff] [blame]	514	// rate limited, retry
				515	slog.WarnContext(ctx, "anthropic_request_rate_limited", "response", string(buf))
				516	errs = errors.Join(errs, fmt.Errorf("status %v: %s", resp.Status, buf))
				517	continue
				518	case resp.StatusCode >= 400 && resp.StatusCode < 500:
				519	// some other 400, probably unrecoverable
				520	slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode)
				521	return nil, errors.Join(errs, fmt.Errorf("status %v: %s", resp.Status, buf))
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	522	default:
Josh Bleecher Snyder	a4500c9	2025-05-15 15:38:32 -0700	[diff] [blame]	523	// ...retry, I guess?
				524	slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode)
				525	errs = errors.Join(errs, fmt.Errorf("status %v: %s", resp.Status, buf))
				526	continue
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	527	}
				528	}
				529	}
				530
				531	// cents per million tokens
				532	// (not dollars because i'm twitchy about using floats for money)
				533	type centsPer1MTokens struct {
				534	Input uint64
				535	Output uint64
				536	CacheRead uint64
				537	CacheCreation uint64
				538	}
				539
				540	// https://www.anthropic.com/pricing#anthropic-api
				541	var modelCost = map[string]centsPer1MTokens{
				542	Claude37Sonnet: {
				543	Input: 300, // $3
				544	Output: 1500, // $15
				545	CacheRead: 30, // $0.30
				546	CacheCreation: 375, // $3.75
				547	},
				548	Claude35Haiku: {
				549	Input: 80, // $0.80
				550	Output: 400, // $4.00
				551	CacheRead: 8, // $0.08
				552	CacheCreation: 100, // $1.00
				553	},
				554	Claude35Sonnet: {
				555	Input: 300, // $3
				556	Output: 1500, // $15
				557	CacheRead: 30, // $0.30
				558	CacheCreation: 375, // $3.75
				559	},
Josh Bleecher Snyder	0e8073a	2025-05-22 21:04:51 -0700	[diff] [blame]	560	Claude4Sonnet: {
				561	Input: 300, // $3
				562	Output: 1500, // $15
				563	CacheRead: 30, // $0.30
				564	CacheCreation: 375, // $3.75
				565	},
				566	Claude4Opus: {
				567	Input: 1500, // $15
				568	Output: 7500, // $75
				569	CacheRead: 150, // $1.50
				570	CacheCreation: 1875, // $18.75
				571	},
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	572	}
				573
				574	// TotalDollars returns the total cost to obtain this response, in dollars.
				575	func (mr *response) TotalDollars() float64 {
				576	cpm, ok := modelCost[mr.Model]
				577	if !ok {
				578	panic(fmt.Sprintf("no pricing info for model: %s", mr.Model))
				579	}
				580	use := mr.Usage
				581	megaCents := use.InputTokens*cpm.Input +
				582	use.OutputTokens*cpm.Output +
				583	use.CacheReadInputTokens*cpm.CacheRead +
				584	use.CacheCreationInputTokens*cpm.CacheCreation
				585	cents := float64(megaCents) / 1_000_000.0
				586	return cents / 100.0
				587	}