all: support openai-compatible models
The support is rather minimal at this point:
Only hard-coded models, only -unsafe, only -skabandaddr="".
The "shared" LLM package is strongly Claude-flavored.
We can fix all of this and more over time, if we are inspired to.
(Maybe we'll switch to https://github.com/maruel/genai?)
The goal for now is to get the rough structure in place.
I've rebased and rebuilt this more times than I care to remember.
diff --git a/loop/agent.go b/loop/agent.go
index 3076385..960bf5a 100644
--- a/loop/agent.go
+++ b/loop/agent.go
@@ -17,10 +17,11 @@
"sync"
"time"
- "sketch.dev/ant"
"sketch.dev/browser"
"sketch.dev/claudetool"
"sketch.dev/claudetool/bashkit"
+ "sketch.dev/llm"
+ "sketch.dev/llm/conversation"
)
const (
@@ -64,8 +65,8 @@
// Returns the current number of messages in the history
MessageCount() int
- TotalUsage() ant.CumulativeUsage
- OriginalBudget() ant.Budget
+ TotalUsage() conversation.CumulativeUsage
+ OriginalBudget() conversation.Budget
WorkingDir() string
@@ -150,7 +151,7 @@
Timestamp time.Time `json:"timestamp"`
ConversationID string `json:"conversation_id"`
ParentConversationID *string `json:"parent_conversation_id,omitempty"`
- Usage *ant.Usage `json:"usage,omitempty"`
+ Usage *llm.Usage `json:"usage,omitempty"`
// Message timing information
StartTime *time.Time `json:"start_time,omitempty"`
@@ -164,7 +165,7 @@
}
// SetConvo sets m.ConversationID and m.ParentConversationID based on convo.
-func (m *AgentMessage) SetConvo(convo *ant.Convo) {
+func (m *AgentMessage) SetConvo(convo *conversation.Convo) {
if convo == nil {
m.ConversationID = ""
m.ParentConversationID = nil
@@ -262,16 +263,16 @@
// ConvoInterface defines the interface for conversation interactions
type ConvoInterface interface {
- CumulativeUsage() ant.CumulativeUsage
- ResetBudget(ant.Budget)
+ CumulativeUsage() conversation.CumulativeUsage
+ ResetBudget(conversation.Budget)
OverBudget() error
- SendMessage(message ant.Message) (*ant.MessageResponse, error)
- SendUserTextMessage(s string, otherContents ...ant.Content) (*ant.MessageResponse, error)
+ SendMessage(message llm.Message) (*llm.Response, error)
+ SendUserTextMessage(s string, otherContents ...llm.Content) (*llm.Response, error)
GetID() string
- ToolResultContents(ctx context.Context, resp *ant.MessageResponse) ([]ant.Content, error)
- ToolResultCancelContents(resp *ant.MessageResponse) ([]ant.Content, error)
+ ToolResultContents(ctx context.Context, resp *llm.Response) ([]llm.Content, error)
+ ToolResultCancelContents(resp *llm.Response) ([]llm.Content, error)
CancelToolUse(toolUseID string, cause error) error
- SubConvoWithHistory() *ant.Convo
+ SubConvoWithHistory() *conversation.Convo
}
type Agent struct {
@@ -287,7 +288,7 @@
outsideHTTP string // base address of the outside webserver (only when under docker)
ready chan struct{} // closed when the agent is initialized (only when under docker)
startedAt time.Time
- originalBudget ant.Budget
+ originalBudget conversation.Budget
title string
branchName string
codereview *claudetool.CodeReviewer
@@ -531,7 +532,7 @@
}
// OnToolCall implements ant.Listener and tracks the start of a tool call.
-func (a *Agent) OnToolCall(ctx context.Context, convo *ant.Convo, id string, toolName string, toolInput json.RawMessage, content ant.Content) {
+func (a *Agent) OnToolCall(ctx context.Context, convo *conversation.Convo, id string, toolName string, toolInput json.RawMessage, content llm.Content) {
// Track the tool call
a.mu.Lock()
a.outstandingToolCalls[id] = toolName
@@ -539,7 +540,7 @@
}
// OnToolResult implements ant.Listener.
-func (a *Agent) OnToolResult(ctx context.Context, convo *ant.Convo, toolID string, toolName string, toolInput json.RawMessage, content ant.Content, result *string, err error) {
+func (a *Agent) OnToolResult(ctx context.Context, convo *conversation.Convo, toolID string, toolName string, toolInput json.RawMessage, content llm.Content, result *string, err error) {
// Remove the tool call from outstanding calls
a.mu.Lock()
delete(a.outstandingToolCalls, toolID)
@@ -553,13 +554,13 @@
ToolName: toolName,
ToolInput: string(toolInput),
ToolCallId: content.ToolUseID,
- StartTime: content.StartTime,
- EndTime: content.EndTime,
+ StartTime: content.ToolUseStartTime,
+ EndTime: content.ToolUseEndTime,
}
// Calculate the elapsed time if both start and end times are set
- if content.StartTime != nil && content.EndTime != nil {
- elapsed := content.EndTime.Sub(*content.StartTime)
+ if content.ToolUseStartTime != nil && content.ToolUseEndTime != nil {
+ elapsed := content.ToolUseEndTime.Sub(*content.ToolUseStartTime)
m.Elapsed = &elapsed
}
@@ -568,18 +569,18 @@
}
// OnRequest implements ant.Listener.
-func (a *Agent) OnRequest(ctx context.Context, convo *ant.Convo, id string, msg *ant.Message) {
+func (a *Agent) OnRequest(ctx context.Context, convo *conversation.Convo, id string, msg *llm.Message) {
a.mu.Lock()
defer a.mu.Unlock()
a.outstandingLLMCalls[id] = struct{}{}
// We already get tool results from the above. We send user messages to the outbox in the agent loop.
}
-// OnResponse implements ant.Listener. Responses contain messages from the LLM
+// OnResponse implements conversation.Listener. Responses contain messages from the LLM
// that need to be displayed (as well as tool calls that we send along when
// they're done). (It would be reasonable to also mention tool calls when they're
// started, but we don't do that yet.)
-func (a *Agent) OnResponse(ctx context.Context, convo *ant.Convo, id string, resp *ant.MessageResponse) {
+func (a *Agent) OnResponse(ctx context.Context, convo *conversation.Convo, id string, resp *llm.Response) {
// Remove the LLM call from outstanding calls
a.mu.Lock()
delete(a.outstandingLLMCalls, id)
@@ -597,7 +598,7 @@
}
endOfTurn := false
- if resp.StopReason != ant.StopReasonToolUse && convo.Parent == nil {
+ if resp.StopReason != llm.StopReasonToolUse && convo.Parent == nil {
endOfTurn = true
}
m := AgentMessage{
@@ -610,10 +611,10 @@
}
// Extract any tool calls from the response
- if resp.StopReason == ant.StopReasonToolUse {
+ if resp.StopReason == llm.StopReasonToolUse {
var toolCalls []ToolCall
for _, part := range resp.Content {
- if part.Type == ant.ContentTypeToolUse {
+ if part.Type == llm.ContentTypeToolUse {
toolCalls = append(toolCalls, ToolCall{
Name: part.ToolName,
Input: string(part.ToolInput),
@@ -653,17 +654,15 @@
return slices.Clone(a.history[start:end])
}
-func (a *Agent) OriginalBudget() ant.Budget {
+func (a *Agent) OriginalBudget() conversation.Budget {
return a.originalBudget
}
// AgentConfig contains configuration for creating a new Agent.
type AgentConfig struct {
Context context.Context
- AntURL string
- APIKey string
- HTTPC *http.Client
- Budget ant.Budget
+ Service llm.Service
+ Budget conversation.Budget
GitUsername string
GitEmail string
SessionID string
@@ -778,15 +777,9 @@
// initConvo initializes the conversation.
// It must not be called until all agent fields are initialized,
// particularly workingDir and git.
-func (a *Agent) initConvo() *ant.Convo {
+func (a *Agent) initConvo() *conversation.Convo {
ctx := a.config.Context
- convo := ant.NewConvo(ctx, a.config.APIKey)
- if a.config.HTTPC != nil {
- convo.HTTPC = a.config.HTTPC
- }
- if a.config.AntURL != "" {
- convo.URL = a.config.AntURL
- }
+ convo := conversation.New(ctx, a.config.Service)
convo.PromptCaching = true
convo.Budget = a.config.Budget
@@ -832,7 +825,7 @@
// Register all tools with the conversation
// When adding, removing, or modifying tools here, double-check that the termui tool display
// template in termui/termui.go has pretty-printing support for all tools.
- convo.Tools = []*ant.Tool{
+ convo.Tools = []*llm.Tool{
bashTool, claudetool.Keyword,
claudetool.Think, a.titleTool(), makeDoneTool(a.codereview, a.config.GitUsername, a.config.GitEmail),
a.codereview.Tool(),
@@ -863,8 +856,8 @@
return false
}
-func (a *Agent) titleTool() *ant.Tool {
- title := &ant.Tool{
+func (a *Agent) titleTool() *llm.Tool {
+ title := &llm.Tool{
Name: "title",
Description: `Sets the conversation title and creates a git branch for tracking work. MANDATORY: You must use this tool before making any git commits.`,
InputSchema: json.RawMessage(`{
@@ -990,20 +983,20 @@
}
}
-func (a *Agent) GatherMessages(ctx context.Context, block bool) ([]ant.Content, error) {
- var m []ant.Content
+func (a *Agent) GatherMessages(ctx context.Context, block bool) ([]llm.Content, error) {
+ var m []llm.Content
if block {
select {
case <-ctx.Done():
return m, ctx.Err()
case msg := <-a.inbox:
- m = append(m, ant.StringContent(msg))
+ m = append(m, llm.StringContent(msg))
}
}
for {
select {
case msg := <-a.inbox:
- m = append(m, ant.StringContent(msg))
+ m = append(m, llm.StringContent(msg))
default:
return m, nil
}
@@ -1052,7 +1045,7 @@
}
// If the model is not requesting to use a tool, we're done
- if resp.StopReason != ant.StopReasonToolUse {
+ if resp.StopReason != llm.StopReasonToolUse {
a.stateMachine.Transition(ctx, StateEndOfTurn, "LLM completed response, ending turn")
break
}
@@ -1078,7 +1071,7 @@
}
// processUserMessage waits for user messages and sends them to the model
-func (a *Agent) processUserMessage(ctx context.Context) (*ant.MessageResponse, error) {
+func (a *Agent) processUserMessage(ctx context.Context) (*llm.Response, error) {
// Wait for at least one message from the user
msgs, err := a.GatherMessages(ctx, true)
if err != nil { // e.g. the context was canceled while blocking in GatherMessages
@@ -1086,8 +1079,8 @@
return nil, err
}
- userMessage := ant.Message{
- Role: ant.MessageRoleUser,
+ userMessage := llm.Message{
+ Role: llm.MessageRoleUser,
Content: msgs,
}
@@ -1109,8 +1102,8 @@
}
// handleToolExecution processes a tool use request from the model
-func (a *Agent) handleToolExecution(ctx context.Context, resp *ant.MessageResponse) (bool, *ant.MessageResponse) {
- var results []ant.Content
+func (a *Agent) handleToolExecution(ctx context.Context, resp *llm.Response) (bool, *llm.Response) {
+ var results []llm.Content
cancelled := false
// Transition to checking for cancellation state
@@ -1200,7 +1193,7 @@
}
// continueTurnWithToolResults continues the conversation with tool results
-func (a *Agent) continueTurnWithToolResults(ctx context.Context, results []ant.Content, autoqualityMessages []string, cancelled bool) (bool, *ant.MessageResponse) {
+func (a *Agent) continueTurnWithToolResults(ctx context.Context, results []llm.Content, autoqualityMessages []string, cancelled bool) (bool, *llm.Response) {
// Get any messages the user sent while tools were executing
a.stateMachine.Transition(ctx, StateGatheringAdditionalMessages, "Gathering additional user messages")
msgs, err := a.GatherMessages(ctx, false)
@@ -1211,19 +1204,19 @@
// Inject any auto-generated messages from quality checks
for _, msg := range autoqualityMessages {
- msgs = append(msgs, ant.StringContent(msg))
+ msgs = append(msgs, llm.StringContent(msg))
}
// Handle cancellation by appending a message about it
if cancelled {
- msgs = append(msgs, ant.StringContent(cancelToolUseMessage))
+ msgs = append(msgs, llm.StringContent(cancelToolUseMessage))
// EndOfTurn is false here so that the client of this agent keeps processing
// further messages; the conversation is not over.
a.pushToOutbox(ctx, AgentMessage{Type: ErrorMessageType, Content: userCancelMessage, EndOfTurn: false})
} else if err := a.convo.OverBudget(); err != nil {
// Handle budget issues by appending a message about it
budgetMsg := "We've exceeded our budget. Please ask the user to confirm before continuing by ending the turn."
- msgs = append(msgs, ant.StringContent(budgetMsg))
+ msgs = append(msgs, llm.StringContent(budgetMsg))
a.pushToOutbox(ctx, budgetMessage(fmt.Errorf("warning: %w (ask to keep trying, if you'd like)", err)))
}
@@ -1232,8 +1225,8 @@
// Send the combined message to continue the conversation
a.stateMachine.Transition(ctx, StateSendingToolResults, "Sending tool results back to LLM")
- resp, err := a.convo.SendMessage(ant.Message{
- Role: ant.MessageRoleUser,
+ resp, err := a.convo.SendMessage(llm.Message{
+ Role: llm.MessageRoleUser,
Content: results,
})
if err != nil {
@@ -1264,11 +1257,11 @@
return nil
}
-func collectTextContent(msg *ant.MessageResponse) string {
+func collectTextContent(msg *llm.Response) string {
// Collect all text content
var allText strings.Builder
for _, content := range msg.Content {
- if content.Type == ant.ContentTypeText && content.Text != "" {
+ if content.Type == llm.ContentTypeText && content.Text != "" {
if allText.Len() > 0 {
allText.WriteString("\n\n")
}
@@ -1278,7 +1271,7 @@
return allText.String()
}
-func (a *Agent) TotalUsage() ant.CumulativeUsage {
+func (a *Agent) TotalUsage() conversation.CumulativeUsage {
a.mu.Lock()
defer a.mu.Unlock()
return a.convo.CumulativeUsage()
@@ -1604,7 +1597,7 @@
Reply with ONLY the reprompt text.
`
- userMessage := ant.UserStringMessage(msg)
+ userMessage := llm.UserStringMessage(msg)
// By doing this in a subconversation, the agent doesn't call tools (because
// there aren't any), and there's not a concurrency risk with on-going other
// outstanding conversations.