Blame - llm/conversation/convo.go - sketch

blob: 4740f22ee841a26d511123bd8f7857d8dc9225b9 [file] [log] [blame]

Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	1	package conversation
				2
				3	import (
				4	"context"
				5	"encoding/json"
				6	"errors"
				7	"fmt"
				8	"log/slog"
				9	"maps"
				10	"math/rand/v2"
				11	"slices"
				12	"strings"
				13	"sync"
				14	"time"
				15
				16	"github.com/oklog/ulid/v2"
				17	"github.com/richardlehane/crock32"
				18	"sketch.dev/llm"
				19	"sketch.dev/skribe"
				20	)
				21
				22	type Listener interface {
				23	// TODO: Content is leaking an anthropic API; should we avoid it?
				24	// TODO: Where should we include start/end time and usage?
				25	OnToolCall(ctx context.Context, convo *Convo, toolCallID string, toolName string, toolInput json.RawMessage, content llm.Content)
				26	OnToolResult(ctx context.Context, convo Convo, toolCallID string, toolName string, toolInput json.RawMessage, content llm.Content, result string, err error)
				27	OnRequest(ctx context.Context, convo Convo, requestID string, msg llm.Message)
				28	OnResponse(ctx context.Context, convo Convo, requestID string, msg llm.Response)
				29	}
				30
				31	type NoopListener struct{}
				32
				33	func (n NoopListener) OnToolCall(ctx context.Context, convo Convo, id string, toolName string, toolInput json.RawMessage, content llm.Content) {
				34	}
				35
				36	func (n NoopListener) OnToolResult(ctx context.Context, convo Convo, id string, toolName string, toolInput json.RawMessage, content llm.Content, result *string, err error) {
				37	}
				38
				39	func (n NoopListener) OnResponse(ctx context.Context, convo Convo, id string, msg *llm.Response) {
				40	}
				41	func (n NoopListener) OnRequest(ctx context.Context, convo Convo, id string, msg *llm.Message) {}
				42
				43	var ErrDoNotRespond = errors.New("do not respond")
				44
				45	// A Convo is a managed conversation with Claude.
				46	// It automatically manages the state of the conversation,
				47	// including appending messages send/received,
				48	// calling tools and sending their results,
				49	// tracking usage, etc.
				50	//
				51	// Exported fields must not be altered concurrently with calling any method on Convo.
				52	// Typical usage is to configure a Convo once before using it.
				53	type Convo struct {
				54	// ID is a unique ID for the conversation
				55	ID string
				56	// Ctx is the context for the entire conversation.
				57	Ctx context.Context
				58	// Service is the LLM service to use.
				59	Service llm.Service
				60	// Tools are the tools available during the conversation.
				61	Tools []*llm.Tool
				62	// SystemPrompt is the system prompt for the conversation.
				63	SystemPrompt string
				64	// PromptCaching indicates whether to use Anthropic's prompt caching.
				65	// See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#continuing-a-multi-turn-conversation
				66	// for the documentation. At request send time, we set the cache_control field on the
				67	// last message. We also cache the system prompt.
				68	// Default: true.
				69	PromptCaching bool
				70	// ToolUseOnly indicates whether Claude may only use tools during this conversation.
				71	// TODO: add more fine-grained control over tool use?
				72	ToolUseOnly bool
				73	// Parent is the parent conversation, if any.
				74	// It is non-nil for "subagent" calls.
				75	// It is set automatically when calling SubConvo,
				76	// and usually should not be set manually.
				77	Parent *Convo
				78	// Budget is the budget for this conversation (and all sub-conversations).
				79	// The Conversation DOES NOT automatically enforce the budget.
				80	// It is up to the caller to call OverBudget() as appropriate.
				81	Budget Budget
Josh Bleecher Snyder	4d54493	2025-05-07 13:33:53 +0000	[diff] [blame]	82	// Hidden indicates that the output of this conversation should be hidden in the UI.
				83	// This is useful for subconversations that can generate noisy, uninteresting output.
				84	Hidden bool
Josh Bleecher Snyder	31785ae	2025-05-06 01:50:58 +0000	[diff] [blame]	85	// ExtraData is extra data to make available to all tool calls.
				86	ExtraData map[string]any
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	87
				88	// messages tracks the messages so far in the conversation.
				89	messages []llm.Message
				90
				91	// Listener receives messages being sent.
				92	Listener Listener
				93
				94	muToolUseCancel *sync.Mutex
				95	toolUseCancel map[string]context.CancelCauseFunc
				96
				97	// Protects usage. This is used for subconversations (that share part of CumulativeUsage) as well.
				98	mu *sync.Mutex
				99	// usage tracks usage for this conversation and all sub-conversations.
				100	usage *CumulativeUsage
				101	}
				102
				103	// newConvoID generates a new 8-byte random id.
				104	// The uniqueness/collision requirements here are very low.
				105	// They are not global identifiers,
				106	// just enough to distinguish different convos in a single session.
				107	func newConvoID() string {
				108	u1 := rand.Uint32()
				109	s := crock32.Encode(uint64(u1))
				110	if len(s) < 7 {
				111	s += strings.Repeat("0", 7-len(s))
				112	}
				113	return s[:3] + "-" + s[3:]
				114	}
				115
				116	// New creates a new conversation with Claude with sensible defaults.
				117	// ctx is the context for the entire conversation.
				118	func New(ctx context.Context, srv llm.Service) *Convo {
				119	id := newConvoID()
				120	return &Convo{
				121	Ctx: skribe.ContextWithAttr(ctx, slog.String("convo_id", id)),
				122	Service: srv,
				123	PromptCaching: true,
				124	usage: newUsage(),
				125	Listener: &NoopListener{},
				126	ID: id,
				127	muToolUseCancel: &sync.Mutex{},
				128	toolUseCancel: map[string]context.CancelCauseFunc{},
				129	mu: &sync.Mutex{},
				130	}
				131	}
				132
				133	// SubConvo creates a sub-conversation with the same configuration as the parent conversation.
				134	// (This propagates context for cancellation, HTTP client, API key, etc.)
				135	// The sub-conversation shares no messages with the parent conversation.
				136	// It does not inherit tools from the parent conversation.
				137	func (c Convo) SubConvo() Convo {
				138	id := newConvoID()
				139	return &Convo{
				140	Ctx: skribe.ContextWithAttr(c.Ctx, slog.String("convo_id", id), slog.String("parent_convo_id", c.ID)),
				141	Service: c.Service,
				142	PromptCaching: c.PromptCaching,
				143	Parent: c,
				144	// For convenience, sub-convo usage shares tool uses map with parent,
				145	// all other fields separate, propagated in AddResponse
				146	usage: newUsageWithSharedToolUses(c.usage),
				147	mu: c.mu,
				148	Listener: c.Listener,
				149	ID: id,
				150	// Do not copy Budget. Each budget is independent,
				151	// and OverBudget checks whether any ancestor is over budget.
				152	}
				153	}
				154
				155	func (c Convo) SubConvoWithHistory() Convo {
				156	id := newConvoID()
				157	return &Convo{
				158	Ctx: skribe.ContextWithAttr(c.Ctx, slog.String("convo_id", id), slog.String("parent_convo_id", c.ID)),
				159	Service: c.Service,
				160	PromptCaching: c.PromptCaching,
				161	Parent: c,
				162	// For convenience, sub-convo usage shares tool uses map with parent,
				163	// all other fields separate, propagated in AddResponse
				164	usage: newUsageWithSharedToolUses(c.usage),
				165	mu: c.mu,
				166	Listener: c.Listener,
				167	ID: id,
				168	// Do not copy Budget. Each budget is independent,
				169	// and OverBudget checks whether any ancestor is over budget.
				170	messages: slices.Clone(c.messages),
				171	}
				172	}
				173
				174	// Depth reports how many "sub-conversations" deep this conversation is.
				175	// That it, it walks up parents until it finds a root.
				176	func (c *Convo) Depth() int {
				177	x := c
				178	var depth int
				179	for x.Parent != nil {
				180	x = x.Parent
				181	depth++
				182	}
				183	return depth
				184	}
				185
				186	// SendUserTextMessage sends a text message to the LLM in this conversation.
				187	// otherContents contains additional contents to send with the message, usually tool results.
				188	func (c Convo) SendUserTextMessage(s string, otherContents ...llm.Content) (llm.Response, error) {
				189	contents := slices.Clone(otherContents)
				190	if s != "" {
				191	contents = append(contents, llm.Content{Type: llm.ContentTypeText, Text: s})
				192	}
				193	msg := llm.Message{
				194	Role: llm.MessageRoleUser,
				195	Content: contents,
				196	}
				197	return c.SendMessage(msg)
				198	}
				199
				200	func (c Convo) messageRequest(msg llm.Message) llm.Request {
				201	system := []llm.SystemContent{}
				202	if c.SystemPrompt != "" {
				203	var d llm.SystemContent
				204	d = llm.SystemContent{Type: "text", Text: c.SystemPrompt}
				205	if c.PromptCaching {
				206	d.Cache = true
				207	}
				208	system = []llm.SystemContent{d}
				209	}
				210
				211	// Claude is happy to return an empty response in response to our Done() call,
				212	// and, if so, you'll see something like:
				213	// API request failed with status 400 Bad Request
				214	// {"type":"error","error": {"type":"invalid_request_error",
				215	// "message":"messages.5: all messages must have non-empty content except for the optional final assistant message"}}
				216	// So, we filter out those empty messages.
				217	var nonEmptyMessages []llm.Message
				218	for _, m := range c.messages {
				219	if len(m.Content) > 0 {
				220	nonEmptyMessages = append(nonEmptyMessages, m)
				221	}
				222	}
				223
				224	mr := &llm.Request{
				225	Messages: append(nonEmptyMessages, msg), // not yet committed to keeping msg
				226	System: system,
				227	Tools: c.Tools,
				228	}
				229	if c.ToolUseOnly {
				230	mr.ToolChoice = &llm.ToolChoice{Type: llm.ToolChoiceTypeAny}
				231	}
				232	return mr
				233	}
				234
				235	func (c Convo) findTool(name string) (llm.Tool, error) {
				236	for _, tool := range c.Tools {
				237	if tool.Name == name {
				238	return tool, nil
				239	}
				240	}
				241	return nil, fmt.Errorf("tool %q not found", name)
				242	}
				243
				244	// insertMissingToolResults adds error results for tool uses that were requested
				245	// but not included in the message, which can happen in error paths like "out of budget."
				246	// We only insert these if there were no tool responses at all, since an incorrect
				247	// number of tool results would be a programmer error. Mutates inputs.
				248	func (c Convo) insertMissingToolResults(mr llm.Request, msg *llm.Message) {
				249	if len(mr.Messages) < 2 {
				250	return
				251	}
				252	prev := mr.Messages[len(mr.Messages)-2]
				253	var toolUsePrev int
				254	for _, c := range prev.Content {
				255	if c.Type == llm.ContentTypeToolUse {
				256	toolUsePrev++
				257	}
				258	}
				259	if toolUsePrev == 0 {
				260	return
				261	}
				262	var toolUseCurrent int
				263	for _, c := range msg.Content {
				264	if c.Type == llm.ContentTypeToolResult {
				265	toolUseCurrent++
				266	}
				267	}
				268	if toolUseCurrent != 0 {
				269	return
				270	}
				271	var prefix []llm.Content
				272	for _, part := range prev.Content {
				273	if part.Type != llm.ContentTypeToolUse {
				274	continue
				275	}
				276	content := llm.Content{
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	277	Type: llm.ContentTypeToolResult,
				278	ToolUseID: part.ID,
				279	ToolError: true,
				280	ToolResult: []llm.Content{{
				281	Type: llm.ContentTypeText,
				282	Text: "not executed; retry possible",
				283	}},
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	284	}
				285	prefix = append(prefix, content)
				286	msg.Content = append(prefix, msg.Content...)
				287	mr.Messages[len(mr.Messages)-1].Content = msg.Content
				288	}
				289	slog.DebugContext(c.Ctx, "inserted missing tool results")
				290	}
				291
				292	// SendMessage sends a message to Claude.
				293	// The conversation records (internally) all messages succesfully sent and received.
				294	func (c Convo) SendMessage(msg llm.Message) (llm.Response, error) {
				295	id := ulid.Make().String()
				296	mr := c.messageRequest(msg)
				297	var lastMessage *llm.Message
				298	if c.PromptCaching {
				299	lastMessage = &mr.Messages[len(mr.Messages)-1]
				300	if len(lastMessage.Content) > 0 {
				301	lastMessage.Content[len(lastMessage.Content)-1].Cache = true
				302	}
				303	}
				304	defer func() {
				305	if lastMessage == nil {
				306	return
				307	}
				308	if len(lastMessage.Content) > 0 {
				309	lastMessage.Content[len(lastMessage.Content)-1].Cache = false
				310	}
				311	}()
				312	c.insertMissingToolResults(mr, &msg)
				313	c.Listener.OnRequest(c.Ctx, c, id, &msg)
				314
				315	startTime := time.Now()
				316	resp, err := c.Service.Do(c.Ctx, mr)
				317	if resp != nil {
				318	resp.StartTime = &startTime
				319	endTime := time.Now()
				320	resp.EndTime = &endTime
				321	}
				322
				323	if err != nil {
				324	c.Listener.OnResponse(c.Ctx, c, id, nil)
				325	return nil, err
				326	}
				327	c.messages = append(c.messages, msg, resp.ToMessage())
				328	// Propagate usage to all ancestors (including us).
				329	for x := c; x != nil; x = x.Parent {
				330	x.usage.Add(resp.Usage)
				331	}
				332	c.Listener.OnResponse(c.Ctx, c, id, resp)
				333	return resp, err
				334	}
				335
				336	type toolCallInfoKeyType string
				337
				338	var toolCallInfoKey toolCallInfoKeyType
				339
				340	type ToolCallInfo struct {
				341	ToolUseID string
				342	Convo *Convo
				343	}
				344
				345	func ToolCallInfoFromContext(ctx context.Context) ToolCallInfo {
				346	v := ctx.Value(toolCallInfoKey)
				347	i, _ := v.(ToolCallInfo)
				348	return i
				349	}
				350
				351	func (c Convo) ToolResultCancelContents(resp llm.Response) ([]llm.Content, error) {
				352	if resp.StopReason != llm.StopReasonToolUse {
				353	return nil, nil
				354	}
				355	var toolResults []llm.Content
				356
				357	for _, part := range resp.Content {
				358	if part.Type != llm.ContentTypeToolUse {
				359	continue
				360	}
				361	c.incrementToolUse(part.ToolName)
				362
				363	content := llm.Content{
				364	Type: llm.ContentTypeToolResult,
				365	ToolUseID: part.ID,
				366	}
				367
				368	content.ToolError = true
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	369	content.ToolResult = []llm.Content{{
				370	Type: llm.ContentTypeText,
				371	Text: "user canceled this too_use",
				372	}}
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	373	toolResults = append(toolResults, content)
				374	}
				375	return toolResults, nil
				376	}
				377
				378	// GetID returns the conversation ID
				379	func (c *Convo) GetID() string {
				380	return c.ID
				381	}
				382
				383	func (c *Convo) CancelToolUse(toolUseID string, err error) error {
				384	c.muToolUseCancel.Lock()
				385	defer c.muToolUseCancel.Unlock()
				386	cancel, ok := c.toolUseCancel[toolUseID]
				387	if !ok {
				388	return fmt.Errorf("cannot cancel %s: no cancel function registered for this tool_use_id. All I have is %+v", toolUseID, c.toolUseCancel)
				389	}
				390	delete(c.toolUseCancel, toolUseID)
				391	cancel(err)
				392	return nil
				393	}
				394
				395	func (c *Convo) newToolUseContext(ctx context.Context, toolUseID string) (context.Context, context.CancelFunc) {
				396	c.muToolUseCancel.Lock()
				397	defer c.muToolUseCancel.Unlock()
				398	ctx, cancel := context.WithCancelCause(ctx)
				399	c.toolUseCancel[toolUseID] = cancel
				400	return ctx, func() { c.CancelToolUse(toolUseID, nil) }
				401	}
				402
				403	// ToolResultContents runs all tool uses requested by the response and returns their results.
				404	// Cancelling ctx will cancel any running tool calls.
Josh Bleecher Snyder	64f2aa8	2025-05-14 18:31:05 +0000	[diff] [blame]	405	// The boolean return value indicates whether any of the executed tools should end the turn.
				406	func (c Convo) ToolResultContents(ctx context.Context, resp llm.Response) ([]llm.Content, bool, error) {
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	407	if resp.StopReason != llm.StopReasonToolUse {
Josh Bleecher Snyder	64f2aa8	2025-05-14 18:31:05 +0000	[diff] [blame]	408	return nil, false, nil
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	409	}
				410	// Extract all tool calls from the response, call the tools, and gather the results.
				411	var wg sync.WaitGroup
				412	toolResultC := make(chan llm.Content, len(resp.Content))
Josh Bleecher Snyder	64f2aa8	2025-05-14 18:31:05 +0000	[diff] [blame]	413
				414	endsTurn := false
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	415	for _, part := range resp.Content {
				416	if part.Type != llm.ContentTypeToolUse {
				417	continue
				418	}
Josh Bleecher Snyder	64f2aa8	2025-05-14 18:31:05 +0000	[diff] [blame]	419	tool, err := c.findTool(part.ToolName)
				420	if err == nil && tool.EndsTurn {
				421	endsTurn = true
				422	}
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	423	c.incrementToolUse(part.ToolName)
				424	startTime := time.Now()
				425
				426	c.Listener.OnToolCall(ctx, c, part.ID, part.ToolName, part.ToolInput, llm.Content{
				427	Type: llm.ContentTypeToolUse,
				428	ToolUseID: part.ID,
				429	ToolUseStartTime: &startTime,
				430	})
				431
				432	wg.Add(1)
				433	go func() {
				434	defer wg.Done()
				435
				436	content := llm.Content{
				437	Type: llm.ContentTypeToolResult,
				438	ToolUseID: part.ID,
				439	ToolUseStartTime: &startTime,
				440	}
				441	sendErr := func(err error) {
				442	// Record end time
				443	endTime := time.Now()
				444	content.ToolUseEndTime = &endTime
				445
				446	content.ToolError = true
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	447	content.ToolResult = []llm.Content{{
				448	Type: llm.ContentTypeText,
				449	Text: err.Error(),
				450	}}
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	451	c.Listener.OnToolResult(ctx, c, part.ID, part.ToolName, part.ToolInput, content, nil, err)
				452	toolResultC <- content
				453	}
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	454	sendRes := func(toolResult []llm.Content) {
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	455	// Record end time
				456	endTime := time.Now()
				457	content.ToolUseEndTime = &endTime
				458
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	459	content.ToolResult = toolResult
				460	var firstText string
				461	if len(toolResult) > 0 {
				462	firstText = toolResult[0].Text
				463	}
				464	c.Listener.OnToolResult(ctx, c, part.ID, part.ToolName, part.ToolInput, content, &firstText, nil)
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	465	toolResultC <- content
				466	}
				467
				468	tool, err := c.findTool(part.ToolName)
				469	if err != nil {
				470	sendErr(err)
				471	return
				472	}
				473	// Create a new context for just this tool_use call, and register its
				474	// cancel function so that it can be canceled individually.
				475	toolUseCtx, cancel := c.newToolUseContext(ctx, part.ID)
				476	defer cancel()
				477	// TODO: move this into newToolUseContext?
				478	toolUseCtx = context.WithValue(toolUseCtx, toolCallInfoKey, ToolCallInfo{ToolUseID: part.ID, Convo: c})
				479	toolResult, err := tool.Run(toolUseCtx, part.ToolInput)
				480	if errors.Is(err, ErrDoNotRespond) {
				481	return
				482	}
				483	if toolUseCtx.Err() != nil {
				484	sendErr(context.Cause(toolUseCtx))
				485	return
				486	}
				487
				488	if err != nil {
				489	sendErr(err)
				490	return
				491	}
				492	sendRes(toolResult)
				493	}()
				494	}
				495	wg.Wait()
				496	close(toolResultC)
				497	var toolResults []llm.Content
				498	for toolResult := range toolResultC {
				499	toolResults = append(toolResults, toolResult)
				500	}
				501	if ctx.Err() != nil {
Josh Bleecher Snyder	64f2aa8	2025-05-14 18:31:05 +0000	[diff] [blame]	502	return nil, false, ctx.Err()
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	503	}
Josh Bleecher Snyder	64f2aa8	2025-05-14 18:31:05 +0000	[diff] [blame]	504	return toolResults, endsTurn, nil
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	505	}
				506
				507	func (c *Convo) incrementToolUse(name string) {
				508	c.mu.Lock()
				509	defer c.mu.Unlock()
				510
				511	c.usage.ToolUses[name]++
				512	}
				513
				514	// CumulativeUsage represents cumulative usage across a Convo, including all sub-conversations.
				515	type CumulativeUsage struct {
				516	StartTime time.Time `json:"start_time"`
				517	Responses uint64 `json:"messages"` // count of responses
				518	InputTokens uint64 `json:"input_tokens"`
				519	OutputTokens uint64 `json:"output_tokens"`
				520	CacheReadInputTokens uint64 `json:"cache_read_input_tokens"`
				521	CacheCreationInputTokens uint64 `json:"cache_creation_input_tokens"`
				522	TotalCostUSD float64 `json:"total_cost_usd"`
				523	ToolUses map[string]int `json:"tool_uses"` // tool name -> number of uses
				524	}
				525
				526	func newUsage() *CumulativeUsage {
				527	return &CumulativeUsage{ToolUses: make(map[string]int), StartTime: time.Now()}
				528	}
				529
				530	func newUsageWithSharedToolUses(parent CumulativeUsage) CumulativeUsage {
				531	return &CumulativeUsage{ToolUses: parent.ToolUses, StartTime: time.Now()}
				532	}
				533
				534	func (u *CumulativeUsage) Clone() CumulativeUsage {
				535	v := *u
				536	v.ToolUses = maps.Clone(u.ToolUses)
				537	return v
				538	}
				539
				540	func (c *Convo) CumulativeUsage() CumulativeUsage {
				541	if c == nil {
				542	return CumulativeUsage{}
				543	}
				544	c.mu.Lock()
				545	defer c.mu.Unlock()
				546	return c.usage.Clone()
				547	}
				548
				549	func (u *CumulativeUsage) WallTime() time.Duration {
				550	return time.Since(u.StartTime)
				551	}
				552
				553	func (u *CumulativeUsage) DollarsPerHour() float64 {
				554	hours := u.WallTime().Hours()
				555	// Prevent division by very small numbers that could cause issues
				556	if hours < 1e-6 {
				557	return 0
				558	}
				559	return u.TotalCostUSD / hours
				560	}
				561
				562	func (u *CumulativeUsage) Add(usage llm.Usage) {
				563	u.Responses++
				564	u.InputTokens += usage.InputTokens
				565	u.OutputTokens += usage.OutputTokens
				566	u.CacheReadInputTokens += usage.CacheReadInputTokens
				567	u.CacheCreationInputTokens += usage.CacheCreationInputTokens
				568	u.TotalCostUSD += usage.CostUSD
				569	}
				570
				571	// TotalInputTokens returns the grand total cumulative input tokens in u.
				572	func (u *CumulativeUsage) TotalInputTokens() uint64 {
				573	return u.InputTokens + u.CacheReadInputTokens + u.CacheCreationInputTokens
				574	}
				575
				576	// Attr returns the cumulative usage as a slog.Attr with key "usage".
				577	func (u CumulativeUsage) Attr() slog.Attr {
				578	elapsed := time.Since(u.StartTime)
				579	return slog.Group("usage",
				580	slog.Duration("wall_time", elapsed),
				581	slog.Uint64("responses", u.Responses),
				582	slog.Uint64("input_tokens", u.InputTokens),
				583	slog.Uint64("output_tokens", u.OutputTokens),
				584	slog.Uint64("cache_read_input_tokens", u.CacheReadInputTokens),
				585	slog.Uint64("cache_creation_input_tokens", u.CacheCreationInputTokens),
				586	slog.Float64("total_cost_usd", u.TotalCostUSD),
				587	slog.Float64("dollars_per_hour", u.TotalCostUSD/elapsed.Hours()),
				588	slog.Any("tool_uses", maps.Clone(u.ToolUses)),
				589	)
				590	}
				591
				592	// A Budget represents the maximum amount of resources that may be spent on a conversation.
				593	// Note that the default (zero) budget is unlimited.
				594	type Budget struct {
				595	MaxResponses uint64 // if > 0, max number of iterations (=responses)
				596	MaxDollars float64 // if > 0, max dollars that may be spent
				597	MaxWallTime time.Duration // if > 0, max wall time that may be spent
				598	}
				599
				600	// OverBudget returns an error if the convo (or any of its parents) has exceeded its budget.
				601	// TODO: document parent vs sub budgets, multiple errors, etc, once we know the desired behavior.
				602	func (c *Convo) OverBudget() error {
				603	for x := c; x != nil; x = x.Parent {
				604	if err := x.overBudget(); err != nil {
				605	return err
				606	}
				607	}
				608	return nil
				609	}
				610
				611	// ResetBudget sets the budget to the passed in budget and
				612	// adjusts it by what's been used so far.
				613	func (c *Convo) ResetBudget(budget Budget) {
				614	c.Budget = budget
				615	if c.Budget.MaxDollars > 0 {
				616	c.Budget.MaxDollars += c.CumulativeUsage().TotalCostUSD
				617	}
				618	if c.Budget.MaxResponses > 0 {
				619	c.Budget.MaxResponses += c.CumulativeUsage().Responses
				620	}
				621	if c.Budget.MaxWallTime > 0 {
				622	c.Budget.MaxWallTime += c.usage.WallTime()
				623	}
				624	}
				625
				626	func (c *Convo) overBudget() error {
				627	usage := c.CumulativeUsage()
				628	// TODO: stop before we exceed the budget instead of after?
				629	// Top priority is money, then time, then response count.
				630	var err error
				631	cont := "Continuing to chat will reset the budget."
				632	if c.Budget.MaxDollars > 0 && usage.TotalCostUSD >= c.Budget.MaxDollars {
				633	err = errors.Join(err, fmt.Errorf("$%.2f spent, budget is $%.2f. %s", usage.TotalCostUSD, c.Budget.MaxDollars, cont))
				634	}
				635	if c.Budget.MaxWallTime > 0 && usage.WallTime() >= c.Budget.MaxWallTime {
				636	err = errors.Join(err, fmt.Errorf("%v elapsed, budget is %v. %s", usage.WallTime().Truncate(time.Second), c.Budget.MaxWallTime.Truncate(time.Second), cont))
				637	}
				638	if c.Budget.MaxResponses > 0 && usage.Responses >= c.Budget.MaxResponses {
				639	err = errors.Join(err, fmt.Errorf("%d responses received, budget is %d. %s", usage.Responses, c.Budget.MaxResponses, cont))
				640	}
				641	return err
				642	}