all: support openai-compatible models

The support is rather minimal at this point:
Only hard-coded models, only -unsafe, only -skabandaddr="".

The "shared" LLM package is strongly Claude-flavored.

We can fix all of this and more over time, if we are inspired to.
(Maybe we'll switch to https://github.com/maruel/genai?)

The goal for now is to get the rough structure in place.
I've rebased and rebuilt this more times than I care to remember.
diff --git a/llm/conversation/convo.go b/llm/conversation/convo.go
new file mode 100644
index 0000000..5a12256
--- /dev/null
+++ b/llm/conversation/convo.go
@@ -0,0 +1,617 @@
+package conversation
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"log/slog"
+	"maps"
+	"math/rand/v2"
+	"slices"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/oklog/ulid/v2"
+	"github.com/richardlehane/crock32"
+	"sketch.dev/llm"
+	"sketch.dev/skribe"
+)
+
+type Listener interface {
+	// TODO: Content is leaking an anthropic API; should we avoid it?
+	// TODO: Where should we include start/end time and usage?
+	OnToolCall(ctx context.Context, convo *Convo, toolCallID string, toolName string, toolInput json.RawMessage, content llm.Content)
+	OnToolResult(ctx context.Context, convo *Convo, toolCallID string, toolName string, toolInput json.RawMessage, content llm.Content, result *string, err error)
+	OnRequest(ctx context.Context, convo *Convo, requestID string, msg *llm.Message)
+	OnResponse(ctx context.Context, convo *Convo, requestID string, msg *llm.Response)
+}
+
+type NoopListener struct{}
+
+func (n *NoopListener) OnToolCall(ctx context.Context, convo *Convo, id string, toolName string, toolInput json.RawMessage, content llm.Content) {
+}
+
+func (n *NoopListener) OnToolResult(ctx context.Context, convo *Convo, id string, toolName string, toolInput json.RawMessage, content llm.Content, result *string, err error) {
+}
+
+func (n *NoopListener) OnResponse(ctx context.Context, convo *Convo, id string, msg *llm.Response) {
+}
+func (n *NoopListener) OnRequest(ctx context.Context, convo *Convo, id string, msg *llm.Message) {}
+
+var ErrDoNotRespond = errors.New("do not respond")
+
+// A Convo is a managed conversation with Claude.
+// It automatically manages the state of the conversation,
+// including appending messages send/received,
+// calling tools and sending their results,
+// tracking usage, etc.
+//
+// Exported fields must not be altered concurrently with calling any method on Convo.
+// Typical usage is to configure a Convo once before using it.
+type Convo struct {
+	// ID is a unique ID for the conversation
+	ID string
+	// Ctx is the context for the entire conversation.
+	Ctx context.Context
+	// Service is the LLM service to use.
+	Service llm.Service
+	// Tools are the tools available during the conversation.
+	Tools []*llm.Tool
+	// SystemPrompt is the system prompt for the conversation.
+	SystemPrompt string
+	// PromptCaching indicates whether to use Anthropic's prompt caching.
+	// See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#continuing-a-multi-turn-conversation
+	// for the documentation. At request send time, we set the cache_control field on the
+	// last message. We also cache the system prompt.
+	// Default: true.
+	PromptCaching bool
+	// ToolUseOnly indicates whether Claude may only use tools during this conversation.
+	// TODO: add more fine-grained control over tool use?
+	ToolUseOnly bool
+	// Parent is the parent conversation, if any.
+	// It is non-nil for "subagent" calls.
+	// It is set automatically when calling SubConvo,
+	// and usually should not be set manually.
+	Parent *Convo
+	// Budget is the budget for this conversation (and all sub-conversations).
+	// The Conversation DOES NOT automatically enforce the budget.
+	// It is up to the caller to call OverBudget() as appropriate.
+	Budget Budget
+
+	// messages tracks the messages so far in the conversation.
+	messages []llm.Message
+
+	// Listener receives messages being sent.
+	Listener Listener
+
+	muToolUseCancel *sync.Mutex
+	toolUseCancel   map[string]context.CancelCauseFunc
+
+	// Protects usage. This is used for subconversations (that share part of CumulativeUsage) as well.
+	mu *sync.Mutex
+	// usage tracks usage for this conversation and all sub-conversations.
+	usage *CumulativeUsage
+}
+
+// newConvoID generates a new 8-byte random id.
+// The uniqueness/collision requirements here are very low.
+// They are not global identifiers,
+// just enough to distinguish different convos in a single session.
+func newConvoID() string {
+	u1 := rand.Uint32()
+	s := crock32.Encode(uint64(u1))
+	if len(s) < 7 {
+		s += strings.Repeat("0", 7-len(s))
+	}
+	return s[:3] + "-" + s[3:]
+}
+
+// New creates a new conversation with Claude with sensible defaults.
+// ctx is the context for the entire conversation.
+func New(ctx context.Context, srv llm.Service) *Convo {
+	id := newConvoID()
+	return &Convo{
+		Ctx:             skribe.ContextWithAttr(ctx, slog.String("convo_id", id)),
+		Service:         srv,
+		PromptCaching:   true,
+		usage:           newUsage(),
+		Listener:        &NoopListener{},
+		ID:              id,
+		muToolUseCancel: &sync.Mutex{},
+		toolUseCancel:   map[string]context.CancelCauseFunc{},
+		mu:              &sync.Mutex{},
+	}
+}
+
+// SubConvo creates a sub-conversation with the same configuration as the parent conversation.
+// (This propagates context for cancellation, HTTP client, API key, etc.)
+// The sub-conversation shares no messages with the parent conversation.
+// It does not inherit tools from the parent conversation.
+func (c *Convo) SubConvo() *Convo {
+	id := newConvoID()
+	return &Convo{
+		Ctx:           skribe.ContextWithAttr(c.Ctx, slog.String("convo_id", id), slog.String("parent_convo_id", c.ID)),
+		Service:       c.Service,
+		PromptCaching: c.PromptCaching,
+		Parent:        c,
+		// For convenience, sub-convo usage shares tool uses map with parent,
+		// all other fields separate, propagated in AddResponse
+		usage:    newUsageWithSharedToolUses(c.usage),
+		mu:       c.mu,
+		Listener: c.Listener,
+		ID:       id,
+		// Do not copy Budget. Each budget is independent,
+		// and OverBudget checks whether any ancestor is over budget.
+	}
+}
+
+func (c *Convo) SubConvoWithHistory() *Convo {
+	id := newConvoID()
+	return &Convo{
+		Ctx:           skribe.ContextWithAttr(c.Ctx, slog.String("convo_id", id), slog.String("parent_convo_id", c.ID)),
+		Service:       c.Service,
+		PromptCaching: c.PromptCaching,
+		Parent:        c,
+		// For convenience, sub-convo usage shares tool uses map with parent,
+		// all other fields separate, propagated in AddResponse
+		usage:    newUsageWithSharedToolUses(c.usage),
+		mu:       c.mu,
+		Listener: c.Listener,
+		ID:       id,
+		// Do not copy Budget. Each budget is independent,
+		// and OverBudget checks whether any ancestor is over budget.
+		messages: slices.Clone(c.messages),
+	}
+}
+
+// Depth reports how many "sub-conversations" deep this conversation is.
+// That it, it walks up parents until it finds a root.
+func (c *Convo) Depth() int {
+	x := c
+	var depth int
+	for x.Parent != nil {
+		x = x.Parent
+		depth++
+	}
+	return depth
+}
+
+// SendUserTextMessage sends a text message to the LLM in this conversation.
+// otherContents contains additional contents to send with the message, usually tool results.
+func (c *Convo) SendUserTextMessage(s string, otherContents ...llm.Content) (*llm.Response, error) {
+	contents := slices.Clone(otherContents)
+	if s != "" {
+		contents = append(contents, llm.Content{Type: llm.ContentTypeText, Text: s})
+	}
+	msg := llm.Message{
+		Role:    llm.MessageRoleUser,
+		Content: contents,
+	}
+	return c.SendMessage(msg)
+}
+
+func (c *Convo) messageRequest(msg llm.Message) *llm.Request {
+	system := []llm.SystemContent{}
+	if c.SystemPrompt != "" {
+		var d llm.SystemContent
+		d = llm.SystemContent{Type: "text", Text: c.SystemPrompt}
+		if c.PromptCaching {
+			d.Cache = true
+		}
+		system = []llm.SystemContent{d}
+	}
+
+	// Claude is happy to return an empty response in response to our Done() call,
+	// and, if so, you'll see something like:
+	// API request failed with status 400 Bad Request
+	// {"type":"error","error":  {"type":"invalid_request_error",
+	// "message":"messages.5: all messages must have non-empty content except for the optional final assistant message"}}
+	// So, we filter out those empty messages.
+	var nonEmptyMessages []llm.Message
+	for _, m := range c.messages {
+		if len(m.Content) > 0 {
+			nonEmptyMessages = append(nonEmptyMessages, m)
+		}
+	}
+
+	mr := &llm.Request{
+		Messages: append(nonEmptyMessages, msg), // not yet committed to keeping msg
+		System:   system,
+		Tools:    c.Tools,
+	}
+	if c.ToolUseOnly {
+		mr.ToolChoice = &llm.ToolChoice{Type: llm.ToolChoiceTypeAny}
+	}
+	return mr
+}
+
+func (c *Convo) findTool(name string) (*llm.Tool, error) {
+	for _, tool := range c.Tools {
+		if tool.Name == name {
+			return tool, nil
+		}
+	}
+	return nil, fmt.Errorf("tool %q not found", name)
+}
+
+// insertMissingToolResults adds error results for tool uses that were requested
+// but not included in the message, which can happen in error paths like "out of budget."
+// We only insert these if there were no tool responses at all, since an incorrect
+// number of tool results would be a programmer error. Mutates inputs.
+func (c *Convo) insertMissingToolResults(mr *llm.Request, msg *llm.Message) {
+	if len(mr.Messages) < 2 {
+		return
+	}
+	prev := mr.Messages[len(mr.Messages)-2]
+	var toolUsePrev int
+	for _, c := range prev.Content {
+		if c.Type == llm.ContentTypeToolUse {
+			toolUsePrev++
+		}
+	}
+	if toolUsePrev == 0 {
+		return
+	}
+	var toolUseCurrent int
+	for _, c := range msg.Content {
+		if c.Type == llm.ContentTypeToolResult {
+			toolUseCurrent++
+		}
+	}
+	if toolUseCurrent != 0 {
+		return
+	}
+	var prefix []llm.Content
+	for _, part := range prev.Content {
+		if part.Type != llm.ContentTypeToolUse {
+			continue
+		}
+		content := llm.Content{
+			Type:       llm.ContentTypeToolResult,
+			ToolUseID:  part.ID,
+			ToolError:  true,
+			ToolResult: "not executed; retry possible",
+		}
+		prefix = append(prefix, content)
+		msg.Content = append(prefix, msg.Content...)
+		mr.Messages[len(mr.Messages)-1].Content = msg.Content
+	}
+	slog.DebugContext(c.Ctx, "inserted missing tool results")
+}
+
+// SendMessage sends a message to Claude.
+// The conversation records (internally) all messages succesfully sent and received.
+func (c *Convo) SendMessage(msg llm.Message) (*llm.Response, error) {
+	id := ulid.Make().String()
+	mr := c.messageRequest(msg)
+	var lastMessage *llm.Message
+	if c.PromptCaching {
+		lastMessage = &mr.Messages[len(mr.Messages)-1]
+		if len(lastMessage.Content) > 0 {
+			lastMessage.Content[len(lastMessage.Content)-1].Cache = true
+		}
+	}
+	defer func() {
+		if lastMessage == nil {
+			return
+		}
+		if len(lastMessage.Content) > 0 {
+			lastMessage.Content[len(lastMessage.Content)-1].Cache = false
+		}
+	}()
+	c.insertMissingToolResults(mr, &msg)
+	c.Listener.OnRequest(c.Ctx, c, id, &msg)
+
+	startTime := time.Now()
+	resp, err := c.Service.Do(c.Ctx, mr)
+	if resp != nil {
+		resp.StartTime = &startTime
+		endTime := time.Now()
+		resp.EndTime = &endTime
+	}
+
+	if err != nil {
+		c.Listener.OnResponse(c.Ctx, c, id, nil)
+		return nil, err
+	}
+	c.messages = append(c.messages, msg, resp.ToMessage())
+	// Propagate usage to all ancestors (including us).
+	for x := c; x != nil; x = x.Parent {
+		x.usage.Add(resp.Usage)
+	}
+	c.Listener.OnResponse(c.Ctx, c, id, resp)
+	return resp, err
+}
+
+type toolCallInfoKeyType string
+
+var toolCallInfoKey toolCallInfoKeyType
+
+type ToolCallInfo struct {
+	ToolUseID string
+	Convo     *Convo
+}
+
+func ToolCallInfoFromContext(ctx context.Context) ToolCallInfo {
+	v := ctx.Value(toolCallInfoKey)
+	i, _ := v.(ToolCallInfo)
+	return i
+}
+
+func (c *Convo) ToolResultCancelContents(resp *llm.Response) ([]llm.Content, error) {
+	if resp.StopReason != llm.StopReasonToolUse {
+		return nil, nil
+	}
+	var toolResults []llm.Content
+
+	for _, part := range resp.Content {
+		if part.Type != llm.ContentTypeToolUse {
+			continue
+		}
+		c.incrementToolUse(part.ToolName)
+
+		content := llm.Content{
+			Type:      llm.ContentTypeToolResult,
+			ToolUseID: part.ID,
+		}
+
+		content.ToolError = true
+		content.ToolResult = "user canceled this too_use"
+		toolResults = append(toolResults, content)
+	}
+	return toolResults, nil
+}
+
+// GetID returns the conversation ID
+func (c *Convo) GetID() string {
+	return c.ID
+}
+
+func (c *Convo) CancelToolUse(toolUseID string, err error) error {
+	c.muToolUseCancel.Lock()
+	defer c.muToolUseCancel.Unlock()
+	cancel, ok := c.toolUseCancel[toolUseID]
+	if !ok {
+		return fmt.Errorf("cannot cancel %s: no cancel function registered for this tool_use_id. All I have is %+v", toolUseID, c.toolUseCancel)
+	}
+	delete(c.toolUseCancel, toolUseID)
+	cancel(err)
+	return nil
+}
+
+func (c *Convo) newToolUseContext(ctx context.Context, toolUseID string) (context.Context, context.CancelFunc) {
+	c.muToolUseCancel.Lock()
+	defer c.muToolUseCancel.Unlock()
+	ctx, cancel := context.WithCancelCause(ctx)
+	c.toolUseCancel[toolUseID] = cancel
+	return ctx, func() { c.CancelToolUse(toolUseID, nil) }
+}
+
+// ToolResultContents runs all tool uses requested by the response and returns their results.
+// Cancelling ctx will cancel any running tool calls.
+func (c *Convo) ToolResultContents(ctx context.Context, resp *llm.Response) ([]llm.Content, error) {
+	if resp.StopReason != llm.StopReasonToolUse {
+		return nil, nil
+	}
+	// Extract all tool calls from the response, call the tools, and gather the results.
+	var wg sync.WaitGroup
+	toolResultC := make(chan llm.Content, len(resp.Content))
+	for _, part := range resp.Content {
+		if part.Type != llm.ContentTypeToolUse {
+			continue
+		}
+		c.incrementToolUse(part.ToolName)
+		startTime := time.Now()
+
+		c.Listener.OnToolCall(ctx, c, part.ID, part.ToolName, part.ToolInput, llm.Content{
+			Type:             llm.ContentTypeToolUse,
+			ToolUseID:        part.ID,
+			ToolUseStartTime: &startTime,
+		})
+
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+
+			content := llm.Content{
+				Type:             llm.ContentTypeToolResult,
+				ToolUseID:        part.ID,
+				ToolUseStartTime: &startTime,
+			}
+			sendErr := func(err error) {
+				// Record end time
+				endTime := time.Now()
+				content.ToolUseEndTime = &endTime
+
+				content.ToolError = true
+				content.ToolResult = err.Error()
+				c.Listener.OnToolResult(ctx, c, part.ID, part.ToolName, part.ToolInput, content, nil, err)
+				toolResultC <- content
+			}
+			sendRes := func(res string) {
+				// Record end time
+				endTime := time.Now()
+				content.ToolUseEndTime = &endTime
+
+				content.ToolResult = res
+				c.Listener.OnToolResult(ctx, c, part.ID, part.ToolName, part.ToolInput, content, &res, nil)
+				toolResultC <- content
+			}
+
+			tool, err := c.findTool(part.ToolName)
+			if err != nil {
+				sendErr(err)
+				return
+			}
+			// Create a new context for just this tool_use call, and register its
+			// cancel function so that it can be canceled individually.
+			toolUseCtx, cancel := c.newToolUseContext(ctx, part.ID)
+			defer cancel()
+			// TODO: move this into newToolUseContext?
+			toolUseCtx = context.WithValue(toolUseCtx, toolCallInfoKey, ToolCallInfo{ToolUseID: part.ID, Convo: c})
+			toolResult, err := tool.Run(toolUseCtx, part.ToolInput)
+			if errors.Is(err, ErrDoNotRespond) {
+				return
+			}
+			if toolUseCtx.Err() != nil {
+				sendErr(context.Cause(toolUseCtx))
+				return
+			}
+
+			if err != nil {
+				sendErr(err)
+				return
+			}
+			sendRes(toolResult)
+		}()
+	}
+	wg.Wait()
+	close(toolResultC)
+	var toolResults []llm.Content
+	for toolResult := range toolResultC {
+		toolResults = append(toolResults, toolResult)
+	}
+	if ctx.Err() != nil {
+		return nil, ctx.Err()
+	}
+	return toolResults, nil
+}
+
+func (c *Convo) incrementToolUse(name string) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	c.usage.ToolUses[name]++
+}
+
+// CumulativeUsage represents cumulative usage across a Convo, including all sub-conversations.
+type CumulativeUsage struct {
+	StartTime                time.Time      `json:"start_time"`
+	Responses                uint64         `json:"messages"` // count of responses
+	InputTokens              uint64         `json:"input_tokens"`
+	OutputTokens             uint64         `json:"output_tokens"`
+	CacheReadInputTokens     uint64         `json:"cache_read_input_tokens"`
+	CacheCreationInputTokens uint64         `json:"cache_creation_input_tokens"`
+	TotalCostUSD             float64        `json:"total_cost_usd"`
+	ToolUses                 map[string]int `json:"tool_uses"` // tool name -> number of uses
+}
+
+func newUsage() *CumulativeUsage {
+	return &CumulativeUsage{ToolUses: make(map[string]int), StartTime: time.Now()}
+}
+
+func newUsageWithSharedToolUses(parent *CumulativeUsage) *CumulativeUsage {
+	return &CumulativeUsage{ToolUses: parent.ToolUses, StartTime: time.Now()}
+}
+
+func (u *CumulativeUsage) Clone() CumulativeUsage {
+	v := *u
+	v.ToolUses = maps.Clone(u.ToolUses)
+	return v
+}
+
+func (c *Convo) CumulativeUsage() CumulativeUsage {
+	if c == nil {
+		return CumulativeUsage{}
+	}
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	return c.usage.Clone()
+}
+
+func (u *CumulativeUsage) WallTime() time.Duration {
+	return time.Since(u.StartTime)
+}
+
+func (u *CumulativeUsage) DollarsPerHour() float64 {
+	hours := u.WallTime().Hours()
+	// Prevent division by very small numbers that could cause issues
+	if hours < 1e-6 {
+		return 0
+	}
+	return u.TotalCostUSD / hours
+}
+
+func (u *CumulativeUsage) Add(usage llm.Usage) {
+	u.Responses++
+	u.InputTokens += usage.InputTokens
+	u.OutputTokens += usage.OutputTokens
+	u.CacheReadInputTokens += usage.CacheReadInputTokens
+	u.CacheCreationInputTokens += usage.CacheCreationInputTokens
+	u.TotalCostUSD += usage.CostUSD
+}
+
+// TotalInputTokens returns the grand total cumulative input tokens in u.
+func (u *CumulativeUsage) TotalInputTokens() uint64 {
+	return u.InputTokens + u.CacheReadInputTokens + u.CacheCreationInputTokens
+}
+
+// Attr returns the cumulative usage as a slog.Attr with key "usage".
+func (u CumulativeUsage) Attr() slog.Attr {
+	elapsed := time.Since(u.StartTime)
+	return slog.Group("usage",
+		slog.Duration("wall_time", elapsed),
+		slog.Uint64("responses", u.Responses),
+		slog.Uint64("input_tokens", u.InputTokens),
+		slog.Uint64("output_tokens", u.OutputTokens),
+		slog.Uint64("cache_read_input_tokens", u.CacheReadInputTokens),
+		slog.Uint64("cache_creation_input_tokens", u.CacheCreationInputTokens),
+		slog.Float64("total_cost_usd", u.TotalCostUSD),
+		slog.Float64("dollars_per_hour", u.TotalCostUSD/elapsed.Hours()),
+		slog.Any("tool_uses", maps.Clone(u.ToolUses)),
+	)
+}
+
+// A Budget represents the maximum amount of resources that may be spent on a conversation.
+// Note that the default (zero) budget is unlimited.
+type Budget struct {
+	MaxResponses uint64        // if > 0, max number of iterations (=responses)
+	MaxDollars   float64       // if > 0, max dollars that may be spent
+	MaxWallTime  time.Duration // if > 0, max wall time that may be spent
+}
+
+// OverBudget returns an error if the convo (or any of its parents) has exceeded its budget.
+// TODO: document parent vs sub budgets, multiple errors, etc, once we know the desired behavior.
+func (c *Convo) OverBudget() error {
+	for x := c; x != nil; x = x.Parent {
+		if err := x.overBudget(); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// ResetBudget sets the budget to the passed in budget and
+// adjusts it by what's been used so far.
+func (c *Convo) ResetBudget(budget Budget) {
+	c.Budget = budget
+	if c.Budget.MaxDollars > 0 {
+		c.Budget.MaxDollars += c.CumulativeUsage().TotalCostUSD
+	}
+	if c.Budget.MaxResponses > 0 {
+		c.Budget.MaxResponses += c.CumulativeUsage().Responses
+	}
+	if c.Budget.MaxWallTime > 0 {
+		c.Budget.MaxWallTime += c.usage.WallTime()
+	}
+}
+
+func (c *Convo) overBudget() error {
+	usage := c.CumulativeUsage()
+	// TODO: stop before we exceed the budget instead of after?
+	// Top priority is money, then time, then response count.
+	var err error
+	cont := "Continuing to chat will reset the budget."
+	if c.Budget.MaxDollars > 0 && usage.TotalCostUSD >= c.Budget.MaxDollars {
+		err = errors.Join(err, fmt.Errorf("$%.2f spent, budget is $%.2f. %s", usage.TotalCostUSD, c.Budget.MaxDollars, cont))
+	}
+	if c.Budget.MaxWallTime > 0 && usage.WallTime() >= c.Budget.MaxWallTime {
+		err = errors.Join(err, fmt.Errorf("%v elapsed, budget is %v. %s", usage.WallTime().Truncate(time.Second), c.Budget.MaxWallTime.Truncate(time.Second), cont))
+	}
+	if c.Budget.MaxResponses > 0 && usage.Responses >= c.Budget.MaxResponses {
+		err = errors.Join(err, fmt.Errorf("%d responses received, budget is %d. %s", usage.Responses, c.Budget.MaxResponses, cont))
+	}
+	return err
+}