Blame - llm/ant/ant.go - sketch

blob: fdf2fde7089c1110a33148efcf883175207093fa [file] [log] [blame]

Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	1	package ant
				2
				3	import (
				4	"bytes"
				5	"cmp"
				6	"context"
				7	"encoding/json"
				8	"fmt"
				9	"io"
				10	"log/slog"
				11	"math/rand/v2"
				12	"net/http"
				13	"strings"
				14	"testing"
				15	"time"
				16
				17	"sketch.dev/llm"
				18	)
				19
				20	const (
				21	DefaultModel = Claude37Sonnet
				22	// See https://docs.anthropic.com/en/docs/about-claude/models/all-models for
				23	// current maximums. There's currently a flag to enable 128k output (output-128k-2025-02-19)
				24	DefaultMaxTokens = 8192
				25	DefaultURL = "https://api.anthropic.com/v1/messages"
				26	)
				27
				28	const (
				29	Claude35Sonnet = "claude-3-5-sonnet-20241022"
				30	Claude35Haiku = "claude-3-5-haiku-20241022"
				31	Claude37Sonnet = "claude-3-7-sonnet-20250219"
				32	)
				33
				34	// Service provides Claude completions.
				35	// Fields should not be altered concurrently with calling any method on Service.
				36	type Service struct {
				37	HTTPC *http.Client // defaults to http.DefaultClient if nil
				38	URL string // defaults to DefaultURL if empty
				39	APIKey string // must be non-empty
				40	Model string // defaults to DefaultModel if empty
				41	MaxTokens int // defaults to DefaultMaxTokens if zero
				42	}
				43
				44	var _ llm.Service = (*Service)(nil)
				45
				46	type content struct {
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	47	// https://docs.anthropic.com/en/api/messages
				48	ID string `json:"id,omitempty"`
				49	Type string `json:"type,omitempty"`
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	50
				51	// Subtly, an empty string appears in tool results often, so we have
				52	// to distinguish between empty string and no string.
				53	// Underlying error looks like one of:
				54	// "messages.46.content.0.tool_result.content.0.text.text: Field required""
				55	// "messages.1.content.1.tool_use.text: Extra inputs are not permitted"
				56	//
				57	// I haven't found a super great source for the API, but
				58	// https://github.com/anthropics/anthropic-sdk-typescript/blob/main/src/resources/messages/messages.ts
				59	// is somewhat acceptable but hard to read.
				60	Text *string `json:"text,omitempty"`
				61	MediaType string `json:"media_type,omitempty"` // for image
				62	Source json.RawMessage `json:"source,omitempty"` // for image
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	63
				64	// for thinking
				65	Thinking string `json:"thinking,omitempty"`
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	66	Data string `json:"data,omitempty"` // for redacted_thinking or image
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	67	Signature string `json:"signature,omitempty"` // for thinking
				68
				69	// for tool_use
				70	ToolName string `json:"name,omitempty"`
				71	ToolInput json.RawMessage `json:"input,omitempty"`
				72
				73	// for tool_result
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	74	ToolUseID string `json:"tool_use_id,omitempty"`
				75	ToolError bool `json:"is_error,omitempty"`
				76	// note the recursive nature here; message looks like:
				77	// {
				78	// "role": "user",
				79	// "content": [
				80	// {
				81	// "type": "tool_result",
				82	// "tool_use_id": "toolu_01A09q90qw90lq917835lq9",
				83	// "content": [
				84	// {"type": "text", "text": "15 degrees"},
				85	// {
				86	// "type": "image",
				87	// "source": {
				88	// "type": "base64",
				89	// "media_type": "image/jpeg",
				90	// "data": "/9j/4AAQSkZJRg...",
				91	// }
				92	// }
				93	// ]
				94	// }
				95	// ]
				96	//}
				97	ToolResult []content `json:"content,omitempty"`
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	98
				99	// timing information for tool_result; not sent to Claude
				100	StartTime *time.Time `json:"-"`
				101	EndTime *time.Time `json:"-"`
				102
				103	CacheControl json.RawMessage `json:"cache_control,omitempty"`
				104	}
				105
				106	// message represents a message in the conversation.
				107	type message struct {
				108	Role string `json:"role"`
				109	Content []content `json:"content"`
				110	ToolUse *toolUse `json:"tool_use,omitempty"` // use to control whether/which tool to use
				111	}
				112
				113	// toolUse represents a tool use in the message content.
				114	type toolUse struct {
				115	ID string `json:"id"`
				116	Name string `json:"name"`
				117	}
				118
				119	// tool represents a tool available to Claude.
				120	type tool struct {
				121	Name string `json:"name"`
				122	// Type is used by the text editor tool; see
				123	// https://docs.anthropic.com/en/docs/build-with-claude/tool-use/text-editor-tool
				124	Type string `json:"type,omitempty"`
				125	Description string `json:"description,omitempty"`
				126	InputSchema json.RawMessage `json:"input_schema,omitempty"`
				127	}
				128
				129	// usage represents the billing and rate-limit usage.
				130	type usage struct {
				131	InputTokens uint64 `json:"input_tokens"`
				132	CacheCreationInputTokens uint64 `json:"cache_creation_input_tokens"`
				133	CacheReadInputTokens uint64 `json:"cache_read_input_tokens"`
				134	OutputTokens uint64 `json:"output_tokens"`
				135	CostUSD float64 `json:"cost_usd"`
				136	}
				137
				138	func (u *usage) Add(other usage) {
				139	u.InputTokens += other.InputTokens
				140	u.CacheCreationInputTokens += other.CacheCreationInputTokens
				141	u.CacheReadInputTokens += other.CacheReadInputTokens
				142	u.OutputTokens += other.OutputTokens
				143	u.CostUSD += other.CostUSD
				144	}
				145
				146	type errorResponse struct {
				147	Type string `json:"type"`
				148	Message string `json:"message"`
				149	}
				150
				151	// response represents the response from the message API.
				152	type response struct {
				153	ID string `json:"id"`
				154	Type string `json:"type"`
				155	Role string `json:"role"`
				156	Model string `json:"model"`
				157	Content []content `json:"content"`
				158	StopReason string `json:"stop_reason"`
				159	StopSequence *string `json:"stop_sequence,omitempty"`
				160	Usage usage `json:"usage"`
				161	}
				162
				163	type toolChoice struct {
				164	Type string `json:"type"`
				165	Name string `json:"name,omitempty"`
				166	}
				167
				168	// https://docs.anthropic.com/en/api/messages#body-system
				169	type systemContent struct {
				170	Text string `json:"text,omitempty"`
				171	Type string `json:"type,omitempty"`
				172	CacheControl json.RawMessage `json:"cache_control,omitempty"`
				173	}
				174
				175	// request represents the request payload for creating a message.
				176	type request struct {
				177	Model string `json:"model"`
				178	Messages []message `json:"messages"`
				179	ToolChoice *toolChoice `json:"tool_choice,omitempty"`
				180	MaxTokens int `json:"max_tokens"`
				181	Tools []*tool `json:"tools,omitempty"`
				182	Stream bool `json:"stream,omitempty"`
				183	System []systemContent `json:"system,omitempty"`
				184	Temperature float64 `json:"temperature,omitempty"`
				185	TopK int `json:"top_k,omitempty"`
				186	TopP float64 `json:"top_p,omitempty"`
				187	StopSequences []string `json:"stop_sequences,omitempty"`
				188
				189	TokenEfficientToolUse bool `json:"-"` // DO NOT USE, broken on Anthropic's side as of 2025-02-28
				190	}
				191
				192	const dumpText = false // debugging toggle to see raw communications with Claude
				193
				194	func mapped[Slice ~[]E, E, T any](s Slice, f func(E) T) []T {
				195	out := make([]T, len(s))
				196	for i, v := range s {
				197	out[i] = f(v)
				198	}
				199	return out
				200	}
				201
				202	func inverted[K, V cmp.Ordered](m map[K]V) map[V]K {
				203	inv := make(map[V]K)
				204	for k, v := range m {
				205	if _, ok := inv[v]; ok {
				206	panic(fmt.Errorf("inverted map has multiple keys for value %v", v))
				207	}
				208	inv[v] = k
				209	}
				210	return inv
				211	}
				212
				213	var (
				214	fromLLMRole = map[llm.MessageRole]string{
				215	llm.MessageRoleAssistant: "assistant",
				216	llm.MessageRoleUser: "user",
				217	}
				218	toLLMRole = inverted(fromLLMRole)
				219
				220	fromLLMContentType = map[llm.ContentType]string{
				221	llm.ContentTypeText: "text",
				222	llm.ContentTypeThinking: "thinking",
				223	llm.ContentTypeRedactedThinking: "redacted_thinking",
				224	llm.ContentTypeToolUse: "tool_use",
				225	llm.ContentTypeToolResult: "tool_result",
				226	}
				227	toLLMContentType = inverted(fromLLMContentType)
				228
				229	fromLLMToolChoiceType = map[llm.ToolChoiceType]string{
				230	llm.ToolChoiceTypeAuto: "auto",
				231	llm.ToolChoiceTypeAny: "any",
				232	llm.ToolChoiceTypeNone: "none",
				233	llm.ToolChoiceTypeTool: "tool",
				234	}
				235
				236	toLLMStopReason = map[string]llm.StopReason{
				237	"stop_sequence": llm.StopReasonStopSequence,
				238	"max_tokens": llm.StopReasonMaxTokens,
				239	"end_turn": llm.StopReasonEndTurn,
				240	"tool_use": llm.StopReasonToolUse,
				241	}
				242	)
				243
				244	func fromLLMCache(c bool) json.RawMessage {
				245	if !c {
				246	return nil
				247	}
				248	return json.RawMessage(`{"type":"ephemeral"}`)
				249	}
				250
				251	func fromLLMContent(c llm.Content) content {
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	252	var toolResult []content
				253	if len(c.ToolResult) > 0 {
				254	toolResult = make([]content, len(c.ToolResult))
				255	for i, tr := range c.ToolResult {
				256	// For image content inside a tool_result, we need to map it to "image" type
				257	if tr.MediaType != "" && tr.MediaType == "image/jpeg" \|\| tr.MediaType == "image/png" {
				258	// Format as an image for Claude
				259	toolResult[i] = content{
				260	Type: "image",
				261	Source: json.RawMessage(fmt.Sprintf(`{"type":"base64","media_type":"%s","data":"%s"}`,
				262	tr.MediaType, tr.Data)),
				263	}
				264	} else {
				265	toolResult[i] = fromLLMContent(tr)
				266	}
				267	}
				268	}
				269
				270	d := content{
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	271	ID: c.ID,
				272	Type: fromLLMContentType[c.Type],
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	273	MediaType: c.MediaType,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	274	Thinking: c.Thinking,
				275	Data: c.Data,
				276	Signature: c.Signature,
				277	ToolName: c.ToolName,
				278	ToolInput: c.ToolInput,
				279	ToolUseID: c.ToolUseID,
				280	ToolError: c.ToolError,
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	281	ToolResult: toolResult,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	282	CacheControl: fromLLMCache(c.Cache),
				283	}
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	284	// Anthropic API complains if Text is specified when it shouldn't be
				285	// or not specified when it's the empty string.
				286	if c.Type != llm.ContentTypeToolResult && c.Type != llm.ContentTypeToolUse {
				287	d.Text = &c.Text
				288	}
				289	return d
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	290	}
				291
				292	func fromLLMToolUse(tu llm.ToolUse) toolUse {
				293	if tu == nil {
				294	return nil
				295	}
				296	return &toolUse{
				297	ID: tu.ID,
				298	Name: tu.Name,
				299	}
				300	}
				301
				302	func fromLLMMessage(msg llm.Message) message {
				303	return message{
				304	Role: fromLLMRole[msg.Role],
				305	Content: mapped(msg.Content, fromLLMContent),
				306	ToolUse: fromLLMToolUse(msg.ToolUse),
				307	}
				308	}
				309
				310	func fromLLMToolChoice(tc llm.ToolChoice) toolChoice {
				311	if tc == nil {
				312	return nil
				313	}
				314	return &toolChoice{
				315	Type: fromLLMToolChoiceType[tc.Type],
				316	Name: tc.Name,
				317	}
				318	}
				319
				320	func fromLLMTool(t llm.Tool) tool {
				321	return &tool{
				322	Name: t.Name,
				323	Type: t.Type,
				324	Description: t.Description,
				325	InputSchema: t.InputSchema,
				326	}
				327	}
				328
				329	func fromLLMSystem(s llm.SystemContent) systemContent {
				330	return systemContent{
				331	Text: s.Text,
				332	Type: s.Type,
				333	CacheControl: fromLLMCache(s.Cache),
				334	}
				335	}
				336
				337	func (s Service) fromLLMRequest(r llm.Request) *request {
				338	return &request{
				339	Model: cmp.Or(s.Model, DefaultModel),
				340	Messages: mapped(r.Messages, fromLLMMessage),
				341	MaxTokens: cmp.Or(s.MaxTokens, DefaultMaxTokens),
				342	ToolChoice: fromLLMToolChoice(r.ToolChoice),
				343	Tools: mapped(r.Tools, fromLLMTool),
				344	System: mapped(r.System, fromLLMSystem),
				345	}
				346	}
				347
				348	func toLLMUsage(u usage) llm.Usage {
				349	return llm.Usage{
				350	InputTokens: u.InputTokens,
				351	CacheCreationInputTokens: u.CacheCreationInputTokens,
				352	CacheReadInputTokens: u.CacheReadInputTokens,
				353	OutputTokens: u.OutputTokens,
				354	CostUSD: u.CostUSD,
				355	}
				356	}
				357
				358	func toLLMContent(c content) llm.Content {
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	359	// Convert toolResult from []content to []llm.Content
				360	var toolResultContents []llm.Content
				361	if len(c.ToolResult) > 0 {
				362	toolResultContents = make([]llm.Content, len(c.ToolResult))
				363	for i, tr := range c.ToolResult {
				364	toolResultContents[i] = toLLMContent(tr)
				365	}
				366	}
				367
				368	ret := llm.Content{
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	369	ID: c.ID,
				370	Type: toLLMContentType[c.Type],
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	371	MediaType: c.MediaType,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	372	Thinking: c.Thinking,
				373	Data: c.Data,
				374	Signature: c.Signature,
				375	ToolName: c.ToolName,
				376	ToolInput: c.ToolInput,
				377	ToolUseID: c.ToolUseID,
				378	ToolError: c.ToolError,
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	379	ToolResult: toolResultContents,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	380	}
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	381	if c.Text != nil {
				382	ret.Text = *c.Text
				383	}
				384	return ret
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	385	}
				386
				387	func toLLMResponse(r response) llm.Response {
				388	return &llm.Response{
				389	ID: r.ID,
				390	Type: r.Type,
				391	Role: toLLMRole[r.Role],
				392	Model: r.Model,
				393	Content: mapped(r.Content, toLLMContent),
				394	StopReason: toLLMStopReason[r.StopReason],
				395	StopSequence: r.StopSequence,
				396	Usage: toLLMUsage(r.Usage),
				397	}
				398	}
				399
				400	// Do sends a request to Anthropic.
				401	func (s Service) Do(ctx context.Context, ir llm.Request) (*llm.Response, error) {
				402	request := s.fromLLMRequest(ir)
				403
				404	var payload []byte
				405	var err error
				406	if dumpText \|\| testing.Testing() {
				407	payload, err = json.MarshalIndent(request, "", " ")
				408	} else {
				409	payload, err = json.Marshal(request)
				410	payload = append(payload, '\n')
				411	}
				412	if err != nil {
				413	return nil, err
				414	}
				415
				416	if false {
				417	fmt.Printf("claude request payload:\n%s\n", payload)
				418	}
				419
				420	backoff := []time.Duration{15 * time.Second, 30 * time.Second, time.Minute}
				421	largerMaxTokens := false
				422	var partialUsage usage
				423
				424	url := cmp.Or(s.URL, DefaultURL)
				425	httpc := cmp.Or(s.HTTPC, http.DefaultClient)
				426
				427	// retry loop
				428	for attempts := 0; ; attempts++ {
				429	if dumpText {
				430	fmt.Printf("RAW REQUEST:\n%s\n\n", payload)
				431	}
				432	req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(payload))
				433	if err != nil {
				434	return nil, err
				435	}
				436
				437	req.Header.Set("Content-Type", "application/json")
				438	req.Header.Set("X-API-Key", s.APIKey)
				439	req.Header.Set("Anthropic-Version", "2023-06-01")
				440
				441	var features []string
				442	if request.TokenEfficientToolUse {
				443	features = append(features, "token-efficient-tool-use-2025-02-19")
				444	}
				445	if largerMaxTokens {
				446	features = append(features, "output-128k-2025-02-19")
				447	request.MaxTokens = 128 * 1024
				448	}
				449	if len(features) > 0 {
				450	req.Header.Set("anthropic-beta", strings.Join(features, ","))
				451	}
				452
				453	resp, err := httpc.Do(req)
				454	if err != nil {
				455	return nil, err
				456	}
				457	buf, _ := io.ReadAll(resp.Body)
				458	resp.Body.Close()
				459
				460	switch {
				461	case resp.StatusCode == http.StatusOK:
				462	if dumpText {
				463	fmt.Printf("RAW RESPONSE:\n%s\n\n", buf)
				464	}
				465	var response response
				466	err = json.NewDecoder(bytes.NewReader(buf)).Decode(&response)
				467	if err != nil {
				468	return nil, err
				469	}
				470	if response.StopReason == "max_tokens" && !largerMaxTokens {
Josh Bleecher Snyder	29fea84	2025-05-06 01:51:09 +0000	[diff] [blame]	471	slog.InfoContext(ctx, "anthropic_retrying_with_larger_tokens", "message", "Retrying Anthropic API call with larger max tokens size")
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	472	// Retry with more output tokens.
				473	largerMaxTokens = true
				474	response.Usage.CostUSD = response.TotalDollars()
				475	partialUsage = response.Usage
				476	continue
				477	}
				478
				479	// Calculate and set the cost_usd field
				480	if largerMaxTokens {
				481	response.Usage.Add(partialUsage)
				482	}
				483	response.Usage.CostUSD = response.TotalDollars()
				484
				485	return toLLMResponse(&response), nil
				486	case resp.StatusCode >= 500 && resp.StatusCode < 600:
				487	// overloaded or unhappy, in one form or another
				488	sleep := backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
				489	slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode, "sleep", sleep)
				490	time.Sleep(sleep)
				491	case resp.StatusCode == 429:
				492	// rate limited. wait 1 minute as a starting point, because that's the rate limiting window.
				493	// and then add some additional time for backoff.
				494	sleep := time.Minute + backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
				495	slog.WarnContext(ctx, "anthropic_request_rate_limited", "response", string(buf), "sleep", sleep)
				496	time.Sleep(sleep)
				497	// case resp.StatusCode == 400:
				498	// TODO: parse ErrorResponse, make (*ErrorResponse) implement error
				499	default:
				500	return nil, fmt.Errorf("API request failed with status %s\n%s", resp.Status, buf)
				501	}
				502	}
				503	}
				504
				505	// cents per million tokens
				506	// (not dollars because i'm twitchy about using floats for money)
				507	type centsPer1MTokens struct {
				508	Input uint64
				509	Output uint64
				510	CacheRead uint64
				511	CacheCreation uint64
				512	}
				513
				514	// https://www.anthropic.com/pricing#anthropic-api
				515	var modelCost = map[string]centsPer1MTokens{
				516	Claude37Sonnet: {
				517	Input: 300, // $3
				518	Output: 1500, // $15
				519	CacheRead: 30, // $0.30
				520	CacheCreation: 375, // $3.75
				521	},
				522	Claude35Haiku: {
				523	Input: 80, // $0.80
				524	Output: 400, // $4.00
				525	CacheRead: 8, // $0.08
				526	CacheCreation: 100, // $1.00
				527	},
				528	Claude35Sonnet: {
				529	Input: 300, // $3
				530	Output: 1500, // $15
				531	CacheRead: 30, // $0.30
				532	CacheCreation: 375, // $3.75
				533	},
				534	}
				535
				536	// TotalDollars returns the total cost to obtain this response, in dollars.
				537	func (mr *response) TotalDollars() float64 {
				538	cpm, ok := modelCost[mr.Model]
				539	if !ok {
				540	panic(fmt.Sprintf("no pricing info for model: %s", mr.Model))
				541	}
				542	use := mr.Usage
				543	megaCents := use.InputTokens*cpm.Input +
				544	use.OutputTokens*cpm.Output +
				545	use.CacheReadInputTokens*cpm.CacheRead +
				546	use.CacheCreationInputTokens*cpm.CacheCreation
				547	cents := float64(megaCents) / 1_000_000.0
				548	return cents / 100.0
				549	}