llm/ant/ant.go - sketch - Gitiles

 package ant

 import (
 	"bytes"
 	"cmp"
 	"context"
 	"encoding/json"
 	"fmt"
 	"io"
 	"log/slog"
 	"math/rand/v2"
 	"net/http"
 	"strings"
 	"testing"
 	"time"

 	"sketch.dev/llm"
 )

 const (
 	DefaultModel = Claude37Sonnet
 	// See https://docs.anthropic.com/en/docs/about-claude/models/all-models for
 	// current maximums. There's currently a flag to enable 128k output (output-128k-2025-02-19)
 	DefaultMaxTokens = 8192
 	DefaultURL       = "https://api.anthropic.com/v1/messages"
 )

 const (
 	Claude35Sonnet = "claude-3-5-sonnet-20241022"
 	Claude35Haiku  = "claude-3-5-haiku-20241022"
 	Claude37Sonnet = "claude-3-7-sonnet-20250219"
 )

 // Service provides Claude completions.
 // Fields should not be altered concurrently with calling any method on Service.
 type Service struct {
 	HTTPC     *http.Client // defaults to http.DefaultClient if nil
 	URL       string       // defaults to DefaultURL if empty
 	APIKey    string       // must be non-empty
 	Model     string       // defaults to DefaultModel if empty
 	MaxTokens int          // defaults to DefaultMaxTokens if zero
 }

 var _ llm.Service = (*Service)(nil)

 type content struct {
 	// TODO: image support?
 	// https://docs.anthropic.com/en/api/messages
 	ID   string `json:"id,omitempty"`
 	Type string `json:"type,omitempty"`
 	Text string `json:"text,omitempty"`

 	// for thinking
 	Thinking  string `json:"thinking,omitempty"`
 	Data      string `json:"data,omitempty"`      // for redacted_thinking
 	Signature string `json:"signature,omitempty"` // for thinking

 	// for tool_use
 	ToolName  string          `json:"name,omitempty"`
 	ToolInput json.RawMessage `json:"input,omitempty"`

 	// for tool_result
 	ToolUseID  string `json:"tool_use_id,omitempty"`
 	ToolError  bool   `json:"is_error,omitempty"`
 	ToolResult string `json:"content,omitempty"`

 	// timing information for tool_result; not sent to Claude
 	StartTime *time.Time `json:"-"`
 	EndTime   *time.Time `json:"-"`

 	CacheControl json.RawMessage `json:"cache_control,omitempty"`
 }

 // message represents a message in the conversation.
 type message struct {
 	Role    string    `json:"role"`
 	Content []content `json:"content"`
 	ToolUse *toolUse  `json:"tool_use,omitempty"` // use to control whether/which tool to use
 }

 // toolUse represents a tool use in the message content.
 type toolUse struct {
 	ID   string `json:"id"`
 	Name string `json:"name"`
 }

 // tool represents a tool available to Claude.
 type tool struct {
 	Name string `json:"name"`
 	// Type is used by the text editor tool; see
 	// https://docs.anthropic.com/en/docs/build-with-claude/tool-use/text-editor-tool
 	Type        string          `json:"type,omitempty"`
 	Description string          `json:"description,omitempty"`
 	InputSchema json.RawMessage `json:"input_schema,omitempty"`
 }

 // usage represents the billing and rate-limit usage.
 type usage struct {
 	InputTokens              uint64  `json:"input_tokens"`
 	CacheCreationInputTokens uint64  `json:"cache_creation_input_tokens"`
 	CacheReadInputTokens     uint64  `json:"cache_read_input_tokens"`
 	OutputTokens             uint64  `json:"output_tokens"`
 	CostUSD                  float64 `json:"cost_usd"`
 }

 func (u *usage) Add(other usage) {
 	u.InputTokens += other.InputTokens
 	u.CacheCreationInputTokens += other.CacheCreationInputTokens
 	u.CacheReadInputTokens += other.CacheReadInputTokens
 	u.OutputTokens += other.OutputTokens
 	u.CostUSD += other.CostUSD
 }

 type errorResponse struct {
 	Type    string `json:"type"`
 	Message string `json:"message"`
 }

 // response represents the response from the message API.
 type response struct {
 	ID           string    `json:"id"`
 	Type         string    `json:"type"`
 	Role         string    `json:"role"`
 	Model        string    `json:"model"`
 	Content      []content `json:"content"`
 	StopReason   string    `json:"stop_reason"`
 	StopSequence *string   `json:"stop_sequence,omitempty"`
 	Usage        usage     `json:"usage"`
 }

 type toolChoice struct {
 	Type string `json:"type"`
 	Name string `json:"name,omitempty"`
 }

 // https://docs.anthropic.com/en/api/messages#body-system
 type systemContent struct {
 	Text         string          `json:"text,omitempty"`
 	Type         string          `json:"type,omitempty"`
 	CacheControl json.RawMessage `json:"cache_control,omitempty"`
 }

 // request represents the request payload for creating a message.
 type request struct {
 	Model         string          `json:"model"`
 	Messages      []message       `json:"messages"`
 	ToolChoice    *toolChoice     `json:"tool_choice,omitempty"`
 	MaxTokens     int             `json:"max_tokens"`
 	Tools         []*tool         `json:"tools,omitempty"`
 	Stream        bool            `json:"stream,omitempty"`
 	System        []systemContent `json:"system,omitempty"`
 	Temperature   float64         `json:"temperature,omitempty"`
 	TopK          int             `json:"top_k,omitempty"`
 	TopP          float64         `json:"top_p,omitempty"`
 	StopSequences []string        `json:"stop_sequences,omitempty"`

 	TokenEfficientToolUse bool `json:"-"` // DO NOT USE, broken on Anthropic's side as of 2025-02-28
 }

 const dumpText = false // debugging toggle to see raw communications with Claude

 func mapped[Slice ~[]E, E, T any](s Slice, f func(E) T) []T {
 	out := make([]T, len(s))
 	for i, v := range s {
 		out[i] = f(v)
 	}
 	return out
 }

 func inverted[K, V cmp.Ordered](m map[K]V) map[V]K {
 	inv := make(map[V]K)
 	for k, v := range m {
 		if _, ok := inv[v]; ok {
 			panic(fmt.Errorf("inverted map has multiple keys for value %v", v))
 		}
 		inv[v] = k
 	}
 	return inv
 }

 var (
 	fromLLMRole = map[llm.MessageRole]string{
 		llm.MessageRoleAssistant: "assistant",
 		llm.MessageRoleUser:      "user",
 	}
 	toLLMRole = inverted(fromLLMRole)

 	fromLLMContentType = map[llm.ContentType]string{
 		llm.ContentTypeText:             "text",
 		llm.ContentTypeThinking:         "thinking",
 		llm.ContentTypeRedactedThinking: "redacted_thinking",
 		llm.ContentTypeToolUse:          "tool_use",
 		llm.ContentTypeToolResult:       "tool_result",
 	}
 	toLLMContentType = inverted(fromLLMContentType)

 	fromLLMToolChoiceType = map[llm.ToolChoiceType]string{
 		llm.ToolChoiceTypeAuto: "auto",
 		llm.ToolChoiceTypeAny:  "any",
 		llm.ToolChoiceTypeNone: "none",
 		llm.ToolChoiceTypeTool: "tool",
 	}

 	toLLMStopReason = map[string]llm.StopReason{
 		"stop_sequence": llm.StopReasonStopSequence,
 		"max_tokens":    llm.StopReasonMaxTokens,
 		"end_turn":      llm.StopReasonEndTurn,
 		"tool_use":      llm.StopReasonToolUse,
 	}
 )

 func fromLLMCache(c bool) json.RawMessage {
 	if !c {
 		return nil
 	}
 	return json.RawMessage(`{"type":"ephemeral"}`)
 }

 func fromLLMContent(c llm.Content) content {
 	return content{
 		ID:           c.ID,
 		Type:         fromLLMContentType[c.Type],
 		Text:         c.Text,
 		Thinking:     c.Thinking,
 		Data:         c.Data,
 		Signature:    c.Signature,
 		ToolName:     c.ToolName,
 		ToolInput:    c.ToolInput,
 		ToolUseID:    c.ToolUseID,
 		ToolError:    c.ToolError,
 		ToolResult:   c.ToolResult,
 		CacheControl: fromLLMCache(c.Cache),
 	}
 }

 func fromLLMToolUse(tu *llm.ToolUse) *toolUse {
 	if tu == nil {
 		return nil
 	}
 	return &toolUse{
 		ID:   tu.ID,
 		Name: tu.Name,
 	}
 }

 func fromLLMMessage(msg llm.Message) message {
 	return message{
 		Role:    fromLLMRole[msg.Role],
 		Content: mapped(msg.Content, fromLLMContent),
 		ToolUse: fromLLMToolUse(msg.ToolUse),
 	}
 }

 func fromLLMToolChoice(tc *llm.ToolChoice) *toolChoice {
 	if tc == nil {
 		return nil
 	}
 	return &toolChoice{
 		Type: fromLLMToolChoiceType[tc.Type],
 		Name: tc.Name,
 	}
 }

 func fromLLMTool(t *llm.Tool) *tool {
 	return &tool{
 		Name:        t.Name,
 		Type:        t.Type,
 		Description: t.Description,
 		InputSchema: t.InputSchema,
 	}
 }

 func fromLLMSystem(s llm.SystemContent) systemContent {
 	return systemContent{
 		Text:         s.Text,
 		Type:         s.Type,
 		CacheControl: fromLLMCache(s.Cache),
 	}
 }

 func (s *Service) fromLLMRequest(r *llm.Request) *request {
 	return &request{
 		Model:      cmp.Or(s.Model, DefaultModel),
 		Messages:   mapped(r.Messages, fromLLMMessage),
 		MaxTokens:  cmp.Or(s.MaxTokens, DefaultMaxTokens),
 		ToolChoice: fromLLMToolChoice(r.ToolChoice),
 		Tools:      mapped(r.Tools, fromLLMTool),
 		System:     mapped(r.System, fromLLMSystem),
 	}
 }

 func toLLMUsage(u usage) llm.Usage {
 	return llm.Usage{
 		InputTokens:              u.InputTokens,
 		CacheCreationInputTokens: u.CacheCreationInputTokens,
 		CacheReadInputTokens:     u.CacheReadInputTokens,
 		OutputTokens:             u.OutputTokens,
 		CostUSD:                  u.CostUSD,
 	}
 }

 func toLLMContent(c content) llm.Content {
 	return llm.Content{
 		ID:         c.ID,
 		Type:       toLLMContentType[c.Type],
 		Text:       c.Text,
 		Thinking:   c.Thinking,
 		Data:       c.Data,
 		Signature:  c.Signature,
 		ToolName:   c.ToolName,
 		ToolInput:  c.ToolInput,
 		ToolUseID:  c.ToolUseID,
 		ToolError:  c.ToolError,
 		ToolResult: c.ToolResult,
 	}
 }

 func toLLMResponse(r *response) *llm.Response {
 	return &llm.Response{
 		ID:           r.ID,
 		Type:         r.Type,
 		Role:         toLLMRole[r.Role],
 		Model:        r.Model,
 		Content:      mapped(r.Content, toLLMContent),
 		StopReason:   toLLMStopReason[r.StopReason],
 		StopSequence: r.StopSequence,
 		Usage:        toLLMUsage(r.Usage),
 	}
 }

 // Do sends a request to Anthropic.
 func (s *Service) Do(ctx context.Context, ir *llm.Request) (*llm.Response, error) {
 	request := s.fromLLMRequest(ir)

 	var payload []byte
 	var err error
 	if dumpText || testing.Testing() {
 		payload, err = json.MarshalIndent(request, "", " ")
 	} else {
 		payload, err = json.Marshal(request)
 		payload = append(payload, '\n')
 	}
 	if err != nil {
 		return nil, err
 	}

 	if false {
 		fmt.Printf("claude request payload:\n%s\n", payload)
 	}

 	backoff := []time.Duration{15 * time.Second, 30 * time.Second, time.Minute}
 	largerMaxTokens := false
 	var partialUsage usage

 	url := cmp.Or(s.URL, DefaultURL)
 	httpc := cmp.Or(s.HTTPC, http.DefaultClient)

 	// retry loop
 	for attempts := 0; ; attempts++ {
 		if dumpText {
 			fmt.Printf("RAW REQUEST:\n%s\n\n", payload)
 		}
 		req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(payload))
 		if err != nil {
 			return nil, err
 		}

 		req.Header.Set("Content-Type", "application/json")
 		req.Header.Set("X-API-Key", s.APIKey)
 		req.Header.Set("Anthropic-Version", "2023-06-01")

 		var features []string
 		if request.TokenEfficientToolUse {
 			features = append(features, "token-efficient-tool-use-2025-02-19")
 		}
 		if largerMaxTokens {
 			features = append(features, "output-128k-2025-02-19")
 			request.MaxTokens = 128 * 1024
 		}
 		if len(features) > 0 {
 			req.Header.Set("anthropic-beta", strings.Join(features, ","))
 		}

 		resp, err := httpc.Do(req)
 		if err != nil {
 			return nil, err
 		}
 		buf, _ := io.ReadAll(resp.Body)
 		resp.Body.Close()

 		switch {
 		case resp.StatusCode == http.StatusOK:
 			if dumpText {
 				fmt.Printf("RAW RESPONSE:\n%s\n\n", buf)
 			}
 			var response response
 			err = json.NewDecoder(bytes.NewReader(buf)).Decode(&response)
 			if err != nil {
 				return nil, err
 			}
 			if response.StopReason == "max_tokens" && !largerMaxTokens {
 				slog.InfoContext(ctx, "anthropic_retrying_with_larger_tokens", "message", "Retrying Anthropic API call with larger max tokens size")
 				// Retry with more output tokens.
 				largerMaxTokens = true
 				response.Usage.CostUSD = response.TotalDollars()
 				partialUsage = response.Usage
 				continue
 			}

 			// Calculate and set the cost_usd field
 			if largerMaxTokens {
 				response.Usage.Add(partialUsage)
 			}
 			response.Usage.CostUSD = response.TotalDollars()

 			return toLLMResponse(&response), nil
 		case resp.StatusCode >= 500 && resp.StatusCode < 600:
 			// overloaded or unhappy, in one form or another
 			sleep := backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
 			slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode, "sleep", sleep)
 			time.Sleep(sleep)
 		case resp.StatusCode == 429:
 			// rate limited. wait 1 minute as a starting point, because that's the rate limiting window.
 			// and then add some additional time for backoff.
 			sleep := time.Minute + backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
 			slog.WarnContext(ctx, "anthropic_request_rate_limited", "response", string(buf), "sleep", sleep)
 			time.Sleep(sleep)
 		// case resp.StatusCode == 400:
 		// TODO: parse ErrorResponse, make (*ErrorResponse) implement error
 		default:
 			return nil, fmt.Errorf("API request failed with status %s\n%s", resp.Status, buf)
 		}
 	}
 }

 // cents per million tokens
 // (not dollars because i'm twitchy about using floats for money)
 type centsPer1MTokens struct {
 	Input         uint64
 	Output        uint64
 	CacheRead     uint64
 	CacheCreation uint64
 }

 // https://www.anthropic.com/pricing#anthropic-api
 var modelCost = map[string]centsPer1MTokens{
 	Claude37Sonnet: {
 		Input:         300,  // $3
 		Output:        1500, // $15
 		CacheRead:     30,   // $0.30
 		CacheCreation: 375,  // $3.75
 	},
 	Claude35Haiku: {
 		Input:         80,  // $0.80
 		Output:        400, // $4.00
 		CacheRead:     8,   // $0.08
 		CacheCreation: 100, // $1.00
 	},
 	Claude35Sonnet: {
 		Input:         300,  // $3
 		Output:        1500, // $15
 		CacheRead:     30,   // $0.30
 		CacheCreation: 375,  // $3.75
 	},
 }

 // TotalDollars returns the total cost to obtain this response, in dollars.
 func (mr *response) TotalDollars() float64 {
 	cpm, ok := modelCost[mr.Model]
 	if !ok {
 		panic(fmt.Sprintf("no pricing info for model: %s", mr.Model))
 	}
 	use := mr.Usage
 	megaCents := use.InputTokens*cpm.Input +
 		use.OutputTokens*cpm.Output +
 		use.CacheReadInputTokens*cpm.CacheRead +
 		use.CacheCreationInputTokens*cpm.CacheCreation
 	cents := float64(megaCents) / 1_000_000.0
 	return cents / 100.0
 }
	package ant

	import (
	"bytes"
	"cmp"
	"context"
	"encoding/json"
	"fmt"
	"io"
	"log/slog"
	"math/rand/v2"
	"net/http"
	"strings"
	"testing"
	"time"

	"sketch.dev/llm"
	)

	const (
	DefaultModel = Claude37Sonnet
	// See https://docs.anthropic.com/en/docs/about-claude/models/all-models for
	// current maximums. There's currently a flag to enable 128k output (output-128k-2025-02-19)
	DefaultMaxTokens = 8192
	DefaultURL = "https://api.anthropic.com/v1/messages"
	)

	const (
	Claude35Sonnet = "claude-3-5-sonnet-20241022"
	Claude35Haiku = "claude-3-5-haiku-20241022"
	Claude37Sonnet = "claude-3-7-sonnet-20250219"
	)

	// Service provides Claude completions.
	// Fields should not be altered concurrently with calling any method on Service.
	type Service struct {
	HTTPC *http.Client // defaults to http.DefaultClient if nil
	URL string // defaults to DefaultURL if empty
	APIKey string // must be non-empty
	Model string // defaults to DefaultModel if empty
	MaxTokens int // defaults to DefaultMaxTokens if zero
	}

	var _ llm.Service = (*Service)(nil)

	type content struct {
	// TODO: image support?
	// https://docs.anthropic.com/en/api/messages
	ID string `json:"id,omitempty"`
	Type string `json:"type,omitempty"`
	Text string `json:"text,omitempty"`

	// for thinking
	Thinking string `json:"thinking,omitempty"`
	Data string `json:"data,omitempty"` // for redacted_thinking
	Signature string `json:"signature,omitempty"` // for thinking

	// for tool_use
	ToolName string `json:"name,omitempty"`
	ToolInput json.RawMessage `json:"input,omitempty"`

	// for tool_result
	ToolUseID string `json:"tool_use_id,omitempty"`
	ToolError bool `json:"is_error,omitempty"`
	ToolResult string `json:"content,omitempty"`

	// timing information for tool_result; not sent to Claude
	StartTime *time.Time `json:"-"`
	EndTime *time.Time `json:"-"`

	CacheControl json.RawMessage `json:"cache_control,omitempty"`
	}

	// message represents a message in the conversation.
	type message struct {
	Role string `json:"role"`
	Content []content `json:"content"`
	ToolUse *toolUse `json:"tool_use,omitempty"` // use to control whether/which tool to use
	}

	// toolUse represents a tool use in the message content.
	type toolUse struct {
	ID string `json:"id"`
	Name string `json:"name"`
	}

	// tool represents a tool available to Claude.
	type tool struct {
	Name string `json:"name"`
	// Type is used by the text editor tool; see
	// https://docs.anthropic.com/en/docs/build-with-claude/tool-use/text-editor-tool
	Type string `json:"type,omitempty"`
	Description string `json:"description,omitempty"`
	InputSchema json.RawMessage `json:"input_schema,omitempty"`
	}

	// usage represents the billing and rate-limit usage.
	type usage struct {
	InputTokens uint64 `json:"input_tokens"`
	CacheCreationInputTokens uint64 `json:"cache_creation_input_tokens"`
	CacheReadInputTokens uint64 `json:"cache_read_input_tokens"`
	OutputTokens uint64 `json:"output_tokens"`
	CostUSD float64 `json:"cost_usd"`
	}

	func (u *usage) Add(other usage) {
	u.InputTokens += other.InputTokens
	u.CacheCreationInputTokens += other.CacheCreationInputTokens
	u.CacheReadInputTokens += other.CacheReadInputTokens
	u.OutputTokens += other.OutputTokens
	u.CostUSD += other.CostUSD
	}

	type errorResponse struct {
	Type string `json:"type"`
	Message string `json:"message"`
	}

	// response represents the response from the message API.
	type response struct {
	ID string `json:"id"`
	Type string `json:"type"`
	Role string `json:"role"`
	Model string `json:"model"`
	Content []content `json:"content"`
	StopReason string `json:"stop_reason"`
	StopSequence *string `json:"stop_sequence,omitempty"`
	Usage usage `json:"usage"`
	}

	type toolChoice struct {
	Type string `json:"type"`
	Name string `json:"name,omitempty"`
	}

	// https://docs.anthropic.com/en/api/messages#body-system
	type systemContent struct {
	Text string `json:"text,omitempty"`
	Type string `json:"type,omitempty"`
	CacheControl json.RawMessage `json:"cache_control,omitempty"`
	}

	// request represents the request payload for creating a message.
	type request struct {
	Model string `json:"model"`
	Messages []message `json:"messages"`
	ToolChoice *toolChoice `json:"tool_choice,omitempty"`
	MaxTokens int `json:"max_tokens"`
	Tools []*tool `json:"tools,omitempty"`
	Stream bool `json:"stream,omitempty"`
	System []systemContent `json:"system,omitempty"`
	Temperature float64 `json:"temperature,omitempty"`
	TopK int `json:"top_k,omitempty"`
	TopP float64 `json:"top_p,omitempty"`
	StopSequences []string `json:"stop_sequences,omitempty"`

	TokenEfficientToolUse bool `json:"-"` // DO NOT USE, broken on Anthropic's side as of 2025-02-28
	}

	const dumpText = false // debugging toggle to see raw communications with Claude

	func mapped[Slice ~[]E, E, T any](s Slice, f func(E) T) []T {
	out := make([]T, len(s))
	for i, v := range s {
	out[i] = f(v)
	}
	return out
	}

	func inverted[K, V cmp.Ordered](m map[K]V) map[V]K {
	inv := make(map[V]K)
	for k, v := range m {
	if _, ok := inv[v]; ok {
	panic(fmt.Errorf("inverted map has multiple keys for value %v", v))
	}
	inv[v] = k
	}
	return inv
	}

	var (
	fromLLMRole = map[llm.MessageRole]string{
	llm.MessageRoleAssistant: "assistant",
	llm.MessageRoleUser: "user",
	}
	toLLMRole = inverted(fromLLMRole)

	fromLLMContentType = map[llm.ContentType]string{
	llm.ContentTypeText: "text",
	llm.ContentTypeThinking: "thinking",
	llm.ContentTypeRedactedThinking: "redacted_thinking",
	llm.ContentTypeToolUse: "tool_use",
	llm.ContentTypeToolResult: "tool_result",
	}
	toLLMContentType = inverted(fromLLMContentType)

	fromLLMToolChoiceType = map[llm.ToolChoiceType]string{
	llm.ToolChoiceTypeAuto: "auto",
	llm.ToolChoiceTypeAny: "any",
	llm.ToolChoiceTypeNone: "none",
	llm.ToolChoiceTypeTool: "tool",
	}

	toLLMStopReason = map[string]llm.StopReason{
	"stop_sequence": llm.StopReasonStopSequence,
	"max_tokens": llm.StopReasonMaxTokens,
	"end_turn": llm.StopReasonEndTurn,
	"tool_use": llm.StopReasonToolUse,
	}
	)

	func fromLLMCache(c bool) json.RawMessage {
	if !c {
	return nil
	}
	return json.RawMessage(`{"type":"ephemeral"}`)
	}

	func fromLLMContent(c llm.Content) content {
	return content{
	ID: c.ID,
	Type: fromLLMContentType[c.Type],
	Text: c.Text,
	Thinking: c.Thinking,
	Data: c.Data,
	Signature: c.Signature,
	ToolName: c.ToolName,
	ToolInput: c.ToolInput,
	ToolUseID: c.ToolUseID,
	ToolError: c.ToolError,
	ToolResult: c.ToolResult,
	CacheControl: fromLLMCache(c.Cache),
	}
	}

	func fromLLMToolUse(tu llm.ToolUse) toolUse {
	if tu == nil {
	return nil
	}
	return &toolUse{
	ID: tu.ID,
	Name: tu.Name,
	}
	}

	func fromLLMMessage(msg llm.Message) message {
	return message{
	Role: fromLLMRole[msg.Role],
	Content: mapped(msg.Content, fromLLMContent),
	ToolUse: fromLLMToolUse(msg.ToolUse),
	}
	}

	func fromLLMToolChoice(tc llm.ToolChoice) toolChoice {
	if tc == nil {
	return nil
	}
	return &toolChoice{
	Type: fromLLMToolChoiceType[tc.Type],
	Name: tc.Name,
	}
	}

	func fromLLMTool(t llm.Tool) tool {
	return &tool{
	Name: t.Name,
	Type: t.Type,
	Description: t.Description,
	InputSchema: t.InputSchema,
	}
	}

	func fromLLMSystem(s llm.SystemContent) systemContent {
	return systemContent{
	Text: s.Text,
	Type: s.Type,
	CacheControl: fromLLMCache(s.Cache),
	}
	}

	func (s Service) fromLLMRequest(r llm.Request) *request {
	return &request{
	Model: cmp.Or(s.Model, DefaultModel),
	Messages: mapped(r.Messages, fromLLMMessage),
	MaxTokens: cmp.Or(s.MaxTokens, DefaultMaxTokens),
	ToolChoice: fromLLMToolChoice(r.ToolChoice),
	Tools: mapped(r.Tools, fromLLMTool),
	System: mapped(r.System, fromLLMSystem),
	}
	}

	func toLLMUsage(u usage) llm.Usage {
	return llm.Usage{
	InputTokens: u.InputTokens,
	CacheCreationInputTokens: u.CacheCreationInputTokens,
	CacheReadInputTokens: u.CacheReadInputTokens,
	OutputTokens: u.OutputTokens,
	CostUSD: u.CostUSD,
	}
	}

	func toLLMContent(c content) llm.Content {
	return llm.Content{
	ID: c.ID,
	Type: toLLMContentType[c.Type],
	Text: c.Text,
	Thinking: c.Thinking,
	Data: c.Data,
	Signature: c.Signature,
	ToolName: c.ToolName,
	ToolInput: c.ToolInput,
	ToolUseID: c.ToolUseID,
	ToolError: c.ToolError,
	ToolResult: c.ToolResult,
	}
	}

	func toLLMResponse(r response) llm.Response {
	return &llm.Response{
	ID: r.ID,
	Type: r.Type,
	Role: toLLMRole[r.Role],
	Model: r.Model,
	Content: mapped(r.Content, toLLMContent),
	StopReason: toLLMStopReason[r.StopReason],
	StopSequence: r.StopSequence,
	Usage: toLLMUsage(r.Usage),
	}
	}

	// Do sends a request to Anthropic.
	func (s Service) Do(ctx context.Context, ir llm.Request) (*llm.Response, error) {
	request := s.fromLLMRequest(ir)

	var payload []byte
	var err error
	if dumpText \|\| testing.Testing() {
	payload, err = json.MarshalIndent(request, "", " ")
	} else {
	payload, err = json.Marshal(request)
	payload = append(payload, '\n')
	}
	if err != nil {
	return nil, err
	}

	if false {
	fmt.Printf("claude request payload:\n%s\n", payload)
	}

	backoff := []time.Duration{15 * time.Second, 30 * time.Second, time.Minute}
	largerMaxTokens := false
	var partialUsage usage

	url := cmp.Or(s.URL, DefaultURL)
	httpc := cmp.Or(s.HTTPC, http.DefaultClient)

	// retry loop
	for attempts := 0; ; attempts++ {
	if dumpText {
	fmt.Printf("RAW REQUEST:\n%s\n\n", payload)
	}
	req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(payload))
	if err != nil {
	return nil, err
	}

	req.Header.Set("Content-Type", "application/json")
	req.Header.Set("X-API-Key", s.APIKey)
	req.Header.Set("Anthropic-Version", "2023-06-01")

	var features []string
	if request.TokenEfficientToolUse {
	features = append(features, "token-efficient-tool-use-2025-02-19")
	}
	if largerMaxTokens {
	features = append(features, "output-128k-2025-02-19")
	request.MaxTokens = 128 * 1024
	}
	if len(features) > 0 {
	req.Header.Set("anthropic-beta", strings.Join(features, ","))
	}

	resp, err := httpc.Do(req)
	if err != nil {
	return nil, err
	}
	buf, _ := io.ReadAll(resp.Body)
	resp.Body.Close()

	switch {
	case resp.StatusCode == http.StatusOK:
	if dumpText {
	fmt.Printf("RAW RESPONSE:\n%s\n\n", buf)
	}
	var response response
	err = json.NewDecoder(bytes.NewReader(buf)).Decode(&response)
	if err != nil {
	return nil, err
	}
	if response.StopReason == "max_tokens" && !largerMaxTokens {
	slog.InfoContext(ctx, "anthropic_retrying_with_larger_tokens", "message", "Retrying Anthropic API call with larger max tokens size")
	// Retry with more output tokens.
	largerMaxTokens = true
	response.Usage.CostUSD = response.TotalDollars()
	partialUsage = response.Usage
	continue
	}

	// Calculate and set the cost_usd field
	if largerMaxTokens {
	response.Usage.Add(partialUsage)
	}
	response.Usage.CostUSD = response.TotalDollars()

	return toLLMResponse(&response), nil
	case resp.StatusCode >= 500 && resp.StatusCode < 600:
	// overloaded or unhappy, in one form or another
	sleep := backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
	slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode, "sleep", sleep)
	time.Sleep(sleep)
	case resp.StatusCode == 429:
	// rate limited. wait 1 minute as a starting point, because that's the rate limiting window.
	// and then add some additional time for backoff.
	sleep := time.Minute + backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
	slog.WarnContext(ctx, "anthropic_request_rate_limited", "response", string(buf), "sleep", sleep)
	time.Sleep(sleep)
	// case resp.StatusCode == 400:
	// TODO: parse ErrorResponse, make (*ErrorResponse) implement error
	default:
	return nil, fmt.Errorf("API request failed with status %s\n%s", resp.Status, buf)
	}
	}
	}

	// cents per million tokens
	// (not dollars because i'm twitchy about using floats for money)
	type centsPer1MTokens struct {
	Input uint64
	Output uint64
	CacheRead uint64
	CacheCreation uint64
	}

	// https://www.anthropic.com/pricing#anthropic-api
	var modelCost = map[string]centsPer1MTokens{
	Claude37Sonnet: {
	Input: 300, // $3
	Output: 1500, // $15
	CacheRead: 30, // $0.30
	CacheCreation: 375, // $3.75
	},
	Claude35Haiku: {
	Input: 80, // $0.80
	Output: 400, // $4.00
	CacheRead: 8, // $0.08
	CacheCreation: 100, // $1.00
	},
	Claude35Sonnet: {
	Input: 300, // $3
	Output: 1500, // $15
	CacheRead: 30, // $0.30
	CacheCreation: 375, // $3.75
	},
	}

	// TotalDollars returns the total cost to obtain this response, in dollars.
	func (mr *response) TotalDollars() float64 {
	cpm, ok := modelCost[mr.Model]
	if !ok {
	panic(fmt.Sprintf("no pricing info for model: %s", mr.Model))
	}
	use := mr.Usage
	megaCents := use.InputTokens*cpm.Input +
	use.OutputTokens*cpm.Output +
	use.CacheReadInputTokens*cpm.CacheRead +
	use.CacheCreationInputTokens*cpm.CacheCreation
	cents := float64(megaCents) / 1_000_000.0
	return cents / 100.0
	}