blob: 2aea24ec2fcb7b33f1e3f82df09578c8848bdcf9 [file] [log] [blame]
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -07001// Package llm provides a unified interface for interacting with LLMs.
2package llm
3
4import (
5 "context"
6 "encoding/json"
7 "fmt"
8 "log/slog"
9 "strings"
10 "time"
11)
12
13type Service interface {
14 // Do sends a request to an LLM.
15 Do(context.Context, *Request) (*Response, error)
Philip Zeyligerb8a8f352025-06-02 07:39:37 -070016 // TokenContextWindow returns the maximum token context window size for this service
17 TokenContextWindow() int
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070018}
19
20// MustSchema validates that schema is a valid JSON schema and returns it as a json.RawMessage.
21// It panics if the schema is invalid.
22func MustSchema(schema string) json.RawMessage {
23 // TODO: validate schema, for now just make sure it's valid JSON
24 schema = strings.TrimSpace(schema)
25 bytes := []byte(schema)
26 if !json.Valid(bytes) {
27 panic("invalid JSON schema: " + schema)
28 }
29 return json.RawMessage(bytes)
30}
31
Josh Bleecher Snyder74d690e2025-05-14 18:16:03 -070032func EmptySchema() json.RawMessage {
33 return MustSchema(`{"type": "object", "properties": {}}`)
34}
35
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070036type Request struct {
37 Messages []Message
38 ToolChoice *ToolChoice
39 Tools []*Tool
40 System []SystemContent
41}
42
43// Message represents a message in the conversation.
44type Message struct {
45 Role MessageRole
46 Content []Content
47 ToolUse *ToolUse // use to control whether/which tool to use
48}
49
50// ToolUse represents a tool use in the message content.
51type ToolUse struct {
52 ID string
53 Name string
54}
55
56type ToolChoice struct {
57 Type ToolChoiceType
58 Name string
59}
60
61type SystemContent struct {
62 Text string
63 Type string
64 Cache bool
65}
66
67// Tool represents a tool available to an LLM.
68type Tool struct {
69 Name string
70 // Type is used by the text editor tool; see
71 // https://docs.anthropic.com/en/docs/build-with-claude/tool-use/text-editor-tool
72 Type string
73 Description string
74 InputSchema json.RawMessage
Sean McCullough021557a2025-05-05 23:20:53 +000075 // EndsTurn indicates that this tool should cause the model to end its turn when used
76 EndsTurn bool
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070077
78 // The Run function is automatically called when the tool is used.
79 // Run functions may be called concurrently with each other and themselves.
80 // The input to Run function is the input to the tool, as provided by Claude, in compliance with the input schema.
81 // The outputs from Run will be sent back to Claude.
82 // If you do not want to respond to the tool call request from Claude, return ErrDoNotRespond.
83 // ctx contains extra (rarely used) tool call information; retrieve it with ToolCallInfoFromContext.
Philip Zeyliger72252cb2025-05-10 17:00:08 -070084 Run func(ctx context.Context, input json.RawMessage) ([]Content, error) `json:"-"`
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070085}
86
87type Content struct {
88 ID string
89 Type ContentType
90 Text string
91
Philip Zeyliger72252cb2025-05-10 17:00:08 -070092 // Media type for image content
93 MediaType string
94
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070095 // for thinking
96 Thinking string
97 Data string
98 Signature string
99
100 // for tool_use
101 ToolName string
102 ToolInput json.RawMessage
103
104 // for tool_result
105 ToolUseID string
106 ToolError bool
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700107 ToolResult []Content
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700108
109 // timing information for tool_result; added externally; not sent to the LLM
110 ToolUseStartTime *time.Time
111 ToolUseEndTime *time.Time
112
113 Cache bool
114}
115
116func StringContent(s string) Content {
117 return Content{Type: ContentTypeText, Text: s}
118}
119
120// ContentsAttr returns contents as a slog.Attr.
121// It is meant for logging.
122func ContentsAttr(contents []Content) slog.Attr {
123 var contentAttrs []any // slog.Attr
124 for _, content := range contents {
125 var attrs []any // slog.Attr
126 switch content.Type {
127 case ContentTypeText:
128 attrs = append(attrs, slog.String("text", content.Text))
129 case ContentTypeToolUse:
130 attrs = append(attrs, slog.String("tool_name", content.ToolName))
131 attrs = append(attrs, slog.String("tool_input", string(content.ToolInput)))
132 case ContentTypeToolResult:
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700133 attrs = append(attrs, slog.Any("tool_result", content.ToolResult))
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700134 attrs = append(attrs, slog.Bool("tool_error", content.ToolError))
135 case ContentTypeThinking:
136 attrs = append(attrs, slog.String("thinking", content.Text))
137 default:
138 attrs = append(attrs, slog.String("unknown_content_type", content.Type.String()))
139 attrs = append(attrs, slog.Any("text", content)) // just log it all raw, better to have too much than not enough
140 }
141 contentAttrs = append(contentAttrs, slog.Group(content.ID, attrs...))
142 }
143 return slog.Group("contents", contentAttrs...)
144}
145
146type (
147 MessageRole int
148 ContentType int
149 ToolChoiceType int
150 StopReason int
151)
152
153//go:generate go tool golang.org/x/tools/cmd/stringer -type=MessageRole,ContentType,ToolChoiceType,StopReason -output=llm_string.go
154
155const (
156 MessageRoleUser MessageRole = iota
157 MessageRoleAssistant
158
159 ContentTypeText ContentType = iota
160 ContentTypeThinking
161 ContentTypeRedactedThinking
162 ContentTypeToolUse
163 ContentTypeToolResult
164
165 ToolChoiceTypeAuto ToolChoiceType = iota // default
166 ToolChoiceTypeAny // any tool, but must use one
167 ToolChoiceTypeNone // no tools allowed
168 ToolChoiceTypeTool // must use the tool specified in the Name field
169
170 StopReasonStopSequence StopReason = iota
171 StopReasonMaxTokens
172 StopReasonEndTurn
173 StopReasonToolUse
Josh Bleecher Snyder0e8073a2025-05-22 21:04:51 -0700174 StopReasonRefusal
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700175)
176
177type Response struct {
178 ID string
179 Type string
180 Role MessageRole
181 Model string
182 Content []Content
183 StopReason StopReason
184 StopSequence *string
185 Usage Usage
186 StartTime *time.Time
187 EndTime *time.Time
188}
189
190func (m *Response) ToMessage() Message {
191 return Message{
192 Role: m.Role,
193 Content: m.Content,
194 }
195}
196
197// Usage represents the billing and rate-limit usage.
198// Most LLM structs do not have JSON tags, to avoid accidental direct use in specific providers.
199// However, the front-end uses this struct, and it relies on its JSON serialization.
200// Do NOT use this struct directly when implementing an llm.Service.
201type Usage struct {
202 InputTokens uint64 `json:"input_tokens"`
203 CacheCreationInputTokens uint64 `json:"cache_creation_input_tokens"`
204 CacheReadInputTokens uint64 `json:"cache_read_input_tokens"`
205 OutputTokens uint64 `json:"output_tokens"`
206 CostUSD float64 `json:"cost_usd"`
207}
208
209func (u *Usage) Add(other Usage) {
210 u.InputTokens += other.InputTokens
211 u.CacheCreationInputTokens += other.CacheCreationInputTokens
212 u.CacheReadInputTokens += other.CacheReadInputTokens
213 u.OutputTokens += other.OutputTokens
214 u.CostUSD += other.CostUSD
215}
216
217func (u *Usage) String() string {
218 return fmt.Sprintf("in: %d, out: %d", u.InputTokens, u.OutputTokens)
219}
220
221func (u *Usage) IsZero() bool {
222 return *u == Usage{}
223}
224
225func (u *Usage) Attr() slog.Attr {
226 return slog.Group("usage",
227 slog.Uint64("input_tokens", u.InputTokens),
228 slog.Uint64("output_tokens", u.OutputTokens),
229 slog.Uint64("cache_creation_input_tokens", u.CacheCreationInputTokens),
230 slog.Uint64("cache_read_input_tokens", u.CacheReadInputTokens),
231 slog.Float64("cost_usd", u.CostUSD),
232 )
233}
234
235// UserStringMessage creates a user message with a single text content item.
236func UserStringMessage(text string) Message {
237 return Message{
238 Role: MessageRoleUser,
239 Content: []Content{StringContent(text)},
240 }
241}
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700242
243// TextContent creates a simple text content for tool results.
244// This is a helper function to create the most common type of tool result content.
245func TextContent(text string) []Content {
246 return []Content{{
247 Type: ContentTypeText,
248 Text: text,
249 }}
250}
251
252// ImageContent creates an image content for tool results.
253// MediaType should be "image/jpeg" or "image/png"
254func ImageContent(text string, mediaType string, base64Data string) []Content {
255 return []Content{{
256 Type: ContentTypeText,
257 Text: text,
258 MediaType: mediaType,
259 Data: base64Data,
260 }}
261}