blob: 6a32a745bc341c08acf322d1ebca1a93b2234fce [file] [log] [blame]
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -07001package oai
2
3import (
4 "cmp"
5 "context"
6 "encoding/json"
7 "errors"
8 "fmt"
9 "log/slog"
10 "math/rand/v2"
11 "net/http"
Philip Zeyliger72252cb2025-05-10 17:00:08 -070012 "strings"
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070013 "time"
14
15 "github.com/sashabaranov/go-openai"
16 "sketch.dev/llm"
17)
18
19const (
20 DefaultMaxTokens = 8192
21
22 OpenAIURL = "https://api.openai.com/v1"
23 FireworksURL = "https://api.fireworks.ai/inference/v1"
24 LlamaCPPURL = "http://localhost:8080/v1"
25 TogetherURL = "https://api.together.xyz/v1"
26 GeminiURL = "https://generativelanguage.googleapis.com/v1beta/openai/"
Josh Bleecher Snyderfa667032025-05-07 14:13:27 -070027 MistralURL = "https://api.mistral.ai/v1"
Josh Bleecher Snyder2edd62e2025-07-14 12:44:51 -070028 MoonshotURL = "https://api.moonshot.ai/v1"
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070029
30 // Environment variable names for API keys
31 OpenAIAPIKeyEnv = "OPENAI_API_KEY"
32 FireworksAPIKeyEnv = "FIREWORKS_API_KEY"
33 TogetherAPIKeyEnv = "TOGETHER_API_KEY"
34 GeminiAPIKeyEnv = "GEMINI_API_KEY"
Josh Bleecher Snyderfa667032025-05-07 14:13:27 -070035 MistralAPIKeyEnv = "MISTRAL_API_KEY"
Josh Bleecher Snyder2edd62e2025-07-14 12:44:51 -070036 MoonshotAPIKeyEnv = "MOONSHOT_API_KEY"
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070037)
38
39type Model struct {
Josh Bleecher Snyder994e9842025-07-30 20:26:47 -070040 UserName string // provided by the user to identify this model (e.g. "gpt4.1")
41 ModelName string // provided to the service provide to specify which model to use (e.g. "gpt-4.1-2025-04-14")
42 URL string
43 APIKeyEnv string // environment variable name for the API key
44 IsReasoningModel bool // whether this model is a reasoning model (e.g. O3, O4-mini)
45 UseSimplifiedPatch bool // whether to use the simplified patch input schema; defaults to false
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070046}
47
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070048var (
49 DefaultModel = GPT41
50
51 GPT41 = Model{
52 UserName: "gpt4.1",
53 ModelName: "gpt-4.1-2025-04-14",
54 URL: OpenAIURL,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070055 APIKeyEnv: OpenAIAPIKeyEnv,
56 }
57
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -070058 GPT4o = Model{
59 UserName: "gpt4o",
60 ModelName: "gpt-4o-2024-08-06",
61 URL: OpenAIURL,
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -070062 APIKeyEnv: OpenAIAPIKeyEnv,
63 }
64
65 GPT4oMini = Model{
66 UserName: "gpt4o-mini",
67 ModelName: "gpt-4o-mini-2024-07-18",
68 URL: OpenAIURL,
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -070069 APIKeyEnv: OpenAIAPIKeyEnv,
70 }
71
72 GPT41Mini = Model{
73 UserName: "gpt4.1-mini",
74 ModelName: "gpt-4.1-mini-2025-04-14",
75 URL: OpenAIURL,
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -070076 APIKeyEnv: OpenAIAPIKeyEnv,
77 }
78
79 GPT41Nano = Model{
80 UserName: "gpt4.1-nano",
81 ModelName: "gpt-4.1-nano-2025-04-14",
82 URL: OpenAIURL,
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -070083 APIKeyEnv: OpenAIAPIKeyEnv,
84 }
85
86 O3 = Model{
87 UserName: "o3",
88 ModelName: "o3-2025-04-16",
89 URL: OpenAIURL,
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -070090 APIKeyEnv: OpenAIAPIKeyEnv,
91 IsReasoningModel: true,
92 }
93
94 O4Mini = Model{
95 UserName: "o4-mini",
96 ModelName: "o4-mini-2025-04-16",
97 URL: OpenAIURL,
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -070098 APIKeyEnv: OpenAIAPIKeyEnv,
99 IsReasoningModel: true,
100 }
101
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700102 Gemini25Flash = Model{
103 UserName: "gemini-flash-2.5",
104 ModelName: "gemini-2.5-flash-preview-04-17",
105 URL: GeminiURL,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700106 APIKeyEnv: GeminiAPIKeyEnv,
107 }
108
109 Gemini25Pro = Model{
110 UserName: "gemini-pro-2.5",
111 ModelName: "gemini-2.5-pro-preview-03-25",
112 URL: GeminiURL,
113 // GRRRR. Really??
114 // Input is: $1.25, prompts <= 200k tokens, $2.50, prompts > 200k tokens
115 // Output is: $10.00, prompts <= 200k tokens, $15.00, prompts > 200k
116 // Caching is: $0.31, prompts <= 200k tokens, $0.625, prompts > 200k, $4.50 / 1,000,000 tokens per hour
117 // Whatever that means. Are we caching? I have no idea.
118 // How do you always manage to be the annoying one, Google?
119 // I'm not complicating things just for you.
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700120 APIKeyEnv: GeminiAPIKeyEnv,
121 }
122
123 TogetherDeepseekV3 = Model{
124 UserName: "together-deepseek-v3",
125 ModelName: "deepseek-ai/DeepSeek-V3",
126 URL: TogetherURL,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700127 APIKeyEnv: TogetherAPIKeyEnv,
128 }
129
Josh Bleecher Snyderd1bd5192025-06-02 14:10:52 -0700130 TogetherDeepseekR1 = Model{
131 UserName: "together-deepseek-r1",
132 ModelName: "deepseek-ai/DeepSeek-R1",
133 URL: TogetherURL,
Josh Bleecher Snyderd1bd5192025-06-02 14:10:52 -0700134 APIKeyEnv: TogetherAPIKeyEnv,
135 }
136
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700137 TogetherLlama4Maverick = Model{
138 UserName: "together-llama4-maverick",
139 ModelName: "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
140 URL: TogetherURL,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700141 APIKeyEnv: TogetherAPIKeyEnv,
142 }
143
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700144 FireworksLlama4Maverick = Model{
145 UserName: "fireworks-llama4-maverick",
146 ModelName: "accounts/fireworks/models/llama4-maverick-instruct-basic",
147 URL: FireworksURL,
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700148 APIKeyEnv: FireworksAPIKeyEnv,
149 }
150
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700151 TogetherLlama3_3_70B = Model{
152 UserName: "together-llama3-70b",
153 ModelName: "meta-llama/Llama-3.3-70B-Instruct-Turbo",
154 URL: TogetherURL,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700155 APIKeyEnv: TogetherAPIKeyEnv,
156 }
157
158 TogetherMistralSmall = Model{
159 UserName: "together-mistral-small",
160 ModelName: "mistralai/Mistral-Small-24B-Instruct-2501",
161 URL: TogetherURL,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700162 APIKeyEnv: TogetherAPIKeyEnv,
163 }
164
Josh Bleecher Snyder3e213082025-05-02 13:22:02 -0700165 TogetherQwen3 = Model{
166 UserName: "together-qwen3",
167 ModelName: "Qwen/Qwen3-235B-A22B-fp8-tput",
168 URL: TogetherURL,
Josh Bleecher Snyder3e213082025-05-02 13:22:02 -0700169 APIKeyEnv: TogetherAPIKeyEnv,
170 }
171
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700172 TogetherGemma2 = Model{
173 UserName: "together-gemma2",
174 ModelName: "google/gemma-2-27b-it",
175 URL: TogetherURL,
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700176 APIKeyEnv: TogetherAPIKeyEnv,
177 }
178
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700179 LlamaCPP = Model{
180 UserName: "llama.cpp",
181 ModelName: "llama.cpp local model",
182 URL: LlamaCPPURL,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700183 }
184
185 FireworksDeepseekV3 = Model{
186 UserName: "fireworks-deepseek-v3",
187 ModelName: "accounts/fireworks/models/deepseek-v3-0324",
188 URL: FireworksURL,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700189 APIKeyEnv: FireworksAPIKeyEnv,
190 }
Josh Bleecher Snyderfa667032025-05-07 14:13:27 -0700191
Josh Bleecher Snyder2edd62e2025-07-14 12:44:51 -0700192 MoonshotKimiK2 = Model{
193 UserName: "moonshot-kimi-k2",
194 ModelName: "moonshot-v1-auto",
195 URL: MoonshotURL,
196 APIKeyEnv: MoonshotAPIKeyEnv,
197 }
198
Josh Bleecher Snyderfa667032025-05-07 14:13:27 -0700199 MistralMedium = Model{
200 UserName: "mistral-medium-3",
201 ModelName: "mistral-medium-latest",
202 URL: MistralURL,
Josh Bleecher Snyderfa667032025-05-07 14:13:27 -0700203 APIKeyEnv: MistralAPIKeyEnv,
204 }
Josh Bleecher Snyder1a648f32025-05-21 17:15:04 +0000205
206 DevstralSmall = Model{
207 UserName: "devstral-small",
208 ModelName: "devstral-small-latest",
209 URL: MistralURL,
Josh Bleecher Snyder1a648f32025-05-21 17:15:04 +0000210 APIKeyEnv: MistralAPIKeyEnv,
211 }
Josh Bleecher Snyderab3702c2025-07-24 20:22:50 +0000212
213 Qwen3CoderFireworks = Model{
Josh Bleecher Snyder994e9842025-07-30 20:26:47 -0700214 UserName: "qwen3-coder-fireworks",
215 ModelName: "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct",
216 URL: FireworksURL,
217 APIKeyEnv: FireworksAPIKeyEnv,
218 UseSimplifiedPatch: true,
Josh Bleecher Snyderab3702c2025-07-24 20:22:50 +0000219 }
Josh Bleecher Snyderd1c1ace2025-07-29 00:16:27 +0000220
221 // Qwen is a skaband-specific model name for Qwen3-Coder
222 // Provider details (URL and APIKeyEnv) are handled by skaband
223 Qwen = Model{
Josh Bleecher Snyder994e9842025-07-30 20:26:47 -0700224 UserName: "qwen",
225 ModelName: "qwen", // skaband will map this to the actual provider model
226 UseSimplifiedPatch: true,
Josh Bleecher Snyderd1c1ace2025-07-29 00:16:27 +0000227 }
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700228)
229
230// Service provides chat completions.
231// Fields should not be altered concurrently with calling any method on Service.
232type Service struct {
233 HTTPC *http.Client // defaults to http.DefaultClient if nil
234 APIKey string // optional, if not set will try to load from env var
235 Model Model // defaults to DefaultModel if zero value
Josh Bleecher Snyderd1c1ace2025-07-29 00:16:27 +0000236 ModelURL string // optional, overrides Model.URL
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700237 MaxTokens int // defaults to DefaultMaxTokens if zero
238 Org string // optional - organization ID
Josh Bleecher Snyder57afbca2025-07-23 13:29:59 -0700239 DumpLLM bool // whether to dump request/response text to files for debugging; defaults to false
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700240}
241
242var _ llm.Service = (*Service)(nil)
243
244// ModelsRegistry is a registry of all known models with their user-friendly names.
245var ModelsRegistry = []Model{
246 GPT41,
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700247 GPT41Mini,
248 GPT41Nano,
249 GPT4o,
250 GPT4oMini,
251 O3,
252 O4Mini,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700253 Gemini25Flash,
254 Gemini25Pro,
255 TogetherDeepseekV3,
Josh Bleecher Snyderd1bd5192025-06-02 14:10:52 -0700256 TogetherDeepseekR1,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700257 TogetherLlama4Maverick,
258 TogetherLlama3_3_70B,
259 TogetherMistralSmall,
Josh Bleecher Snyder3e213082025-05-02 13:22:02 -0700260 TogetherQwen3,
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700261 TogetherGemma2,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700262 LlamaCPP,
263 FireworksDeepseekV3,
Josh Bleecher Snyder2edd62e2025-07-14 12:44:51 -0700264 MoonshotKimiK2,
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700265 FireworksLlama4Maverick,
266 MistralMedium,
Josh Bleecher Snyder1a648f32025-05-21 17:15:04 +0000267 DevstralSmall,
Josh Bleecher Snyderab3702c2025-07-24 20:22:50 +0000268 Qwen3CoderFireworks,
Josh Bleecher Snyderd1c1ace2025-07-29 00:16:27 +0000269 Qwen,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700270}
271
272// ListModels returns a list of all available models with their user-friendly names.
273func ListModels() []string {
274 var names []string
275 for _, model := range ModelsRegistry {
276 if model.UserName != "" {
277 names = append(names, model.UserName)
278 }
279 }
280 return names
281}
282
283// ModelByUserName returns a model by its user-friendly name.
284// Returns nil if no model with the given name is found.
Josh Bleecher Snyder0530da02025-07-23 03:47:43 +0000285func ModelByUserName(name string) Model {
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700286 for _, model := range ModelsRegistry {
287 if model.UserName == name {
Josh Bleecher Snyder0530da02025-07-23 03:47:43 +0000288 return model
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700289 }
290 }
Josh Bleecher Snyder0530da02025-07-23 03:47:43 +0000291 return Model{}
292}
293
294func (m Model) IsZero() bool {
295 return m == Model{}
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700296}
297
298var (
299 fromLLMRole = map[llm.MessageRole]string{
300 llm.MessageRoleAssistant: "assistant",
301 llm.MessageRoleUser: "user",
302 }
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700303 fromLLMToolChoiceType = map[llm.ToolChoiceType]string{
304 llm.ToolChoiceTypeAuto: "auto",
305 llm.ToolChoiceTypeAny: "any",
306 llm.ToolChoiceTypeNone: "none",
307 llm.ToolChoiceTypeTool: "function", // OpenAI uses "function" instead of "tool"
308 }
309 toLLMRole = map[string]llm.MessageRole{
310 "assistant": llm.MessageRoleAssistant,
311 "user": llm.MessageRoleUser,
312 }
313 toLLMStopReason = map[string]llm.StopReason{
314 "stop": llm.StopReasonStopSequence,
315 "length": llm.StopReasonMaxTokens,
316 "tool_calls": llm.StopReasonToolUse,
317 "function_call": llm.StopReasonToolUse, // Map both to ToolUse
318 "content_filter": llm.StopReasonStopSequence, // No direct equivalent
319 }
320)
321
322// fromLLMContent converts llm.Content to the format expected by OpenAI.
323func fromLLMContent(c llm.Content) (string, []openai.ToolCall) {
324 switch c.Type {
325 case llm.ContentTypeText:
326 return c.Text, nil
327 case llm.ContentTypeToolUse:
328 // For OpenAI, tool use is sent as a null content with tool_calls in the message
329 return "", []openai.ToolCall{
330 {
331 Type: openai.ToolTypeFunction,
332 ID: c.ID, // Use the content ID if provided
333 Function: openai.FunctionCall{
334 Name: c.ToolName,
335 Arguments: string(c.ToolInput),
336 },
337 },
338 }
339 case llm.ContentTypeToolResult:
340 // Tool results in OpenAI are sent as a separate message with tool_call_id
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700341 // OpenAI doesn't support multiple content items or images in tool results
342 // Combine all text content into a single string
343 var resultText string
344 if len(c.ToolResult) > 0 {
345 // Collect all text from content objects
346 texts := make([]string, 0, len(c.ToolResult))
347 for _, result := range c.ToolResult {
348 if result.Text != "" {
349 texts = append(texts, result.Text)
350 }
351 }
352 resultText = strings.Join(texts, "\n")
353 }
354 return resultText, nil
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700355 default:
356 // For thinking or other types, convert to text
357 return c.Text, nil
358 }
359}
360
361// fromLLMMessage converts llm.Message to OpenAI ChatCompletionMessage format
362func fromLLMMessage(msg llm.Message) []openai.ChatCompletionMessage {
363 // For OpenAI, we need to handle tool results differently than regular messages
364 // Each tool result becomes its own message with role="tool"
365
366 var messages []openai.ChatCompletionMessage
367
368 // Check if this is a regular message or contains tool results
369 var regularContent []llm.Content
370 var toolResults []llm.Content
371
372 for _, c := range msg.Content {
373 if c.Type == llm.ContentTypeToolResult {
374 toolResults = append(toolResults, c)
375 } else {
376 regularContent = append(regularContent, c)
377 }
378 }
379
380 // Process tool results as separate messages, but first
381 for _, tr := range toolResults {
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700382 // Convert toolresult array to a string for OpenAI
Josh Bleecher Snyder40c9da82025-07-24 21:08:20 +0000383 // Collect all text from content objects
384 var texts []string
385 for _, result := range tr.ToolResult {
386 if strings.TrimSpace(result.Text) != "" {
387 texts = append(texts, result.Text)
388 }
389 }
390 toolResultContent := strings.Join(texts, "\n")
391
392 // OpenAI doesn't have an explicit error field for tool results, so add it directly to the content.
393 if tr.ToolError {
394 if toolResultContent != "" {
395 toolResultContent = "error: " + toolResultContent
396 } else {
397 toolResultContent = "error: tool execution failed"
398 }
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700399 }
400
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700401 m := openai.ChatCompletionMessage{
402 Role: "tool",
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700403 Content: cmp.Or(toolResultContent, " "), // Use empty space if empty to avoid omitempty issues
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700404 ToolCallID: tr.ToolUseID,
405 }
406 messages = append(messages, m)
407 }
408 // Process regular content second
409 if len(regularContent) > 0 {
410 m := openai.ChatCompletionMessage{
411 Role: fromLLMRole[msg.Role],
412 }
413
414 // For assistant messages that contain tool calls
415 var toolCalls []openai.ToolCall
416 var textContent string
417
418 for _, c := range regularContent {
419 content, tools := fromLLMContent(c)
420 if len(tools) > 0 {
421 toolCalls = append(toolCalls, tools...)
422 } else if content != "" {
423 if textContent != "" {
424 textContent += "\n"
425 }
426 textContent += content
427 }
428 }
429
430 m.Content = textContent
431 m.ToolCalls = toolCalls
432
433 messages = append(messages, m)
434 }
435
436 return messages
437}
438
439// fromLLMToolChoice converts llm.ToolChoice to the format expected by OpenAI.
440func fromLLMToolChoice(tc *llm.ToolChoice) any {
441 if tc == nil {
442 return nil
443 }
444
445 if tc.Type == llm.ToolChoiceTypeTool && tc.Name != "" {
446 return openai.ToolChoice{
447 Type: openai.ToolTypeFunction,
448 Function: openai.ToolFunction{
449 Name: tc.Name,
450 },
451 }
452 }
453
454 // For non-specific tool choice, just use the string
455 return fromLLMToolChoiceType[tc.Type]
456}
457
458// fromLLMTool converts llm.Tool to the format expected by OpenAI.
459func fromLLMTool(t *llm.Tool) openai.Tool {
460 return openai.Tool{
461 Type: openai.ToolTypeFunction,
462 Function: &openai.FunctionDefinition{
463 Name: t.Name,
464 Description: t.Description,
465 Parameters: t.InputSchema,
466 },
467 }
468}
469
470// fromLLMSystem converts llm.SystemContent to an OpenAI system message.
471func fromLLMSystem(systemContent []llm.SystemContent) []openai.ChatCompletionMessage {
472 if len(systemContent) == 0 {
473 return nil
474 }
475
476 // Combine all system content into a single system message
477 var systemText string
478 for i, content := range systemContent {
479 if i > 0 && systemText != "" && content.Text != "" {
480 systemText += "\n"
481 }
482 systemText += content.Text
483 }
484
485 if systemText == "" {
486 return nil
487 }
488
489 return []openai.ChatCompletionMessage{
490 {
491 Role: "system",
492 Content: systemText,
493 },
494 }
495}
496
497// toRawLLMContent converts a raw content string from OpenAI to llm.Content.
498func toRawLLMContent(content string) llm.Content {
499 return llm.Content{
500 Type: llm.ContentTypeText,
501 Text: content,
502 }
503}
504
505// toToolCallLLMContent converts a tool call from OpenAI to llm.Content.
506func toToolCallLLMContent(toolCall openai.ToolCall) llm.Content {
507 // Generate a content ID if needed
508 id := toolCall.ID
509 if id == "" {
510 // Create a deterministic ID based on the function name if no ID is provided
511 id = "tc_" + toolCall.Function.Name
512 }
513
514 return llm.Content{
515 ID: id,
516 Type: llm.ContentTypeToolUse,
517 ToolName: toolCall.Function.Name,
518 ToolInput: json.RawMessage(toolCall.Function.Arguments),
519 }
520}
521
522// toToolResultLLMContent converts a tool result message from OpenAI to llm.Content.
523func toToolResultLLMContent(msg openai.ChatCompletionMessage) llm.Content {
524 return llm.Content{
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700525 Type: llm.ContentTypeToolResult,
526 ToolUseID: msg.ToolCallID,
527 ToolResult: []llm.Content{{
528 Type: llm.ContentTypeText,
529 Text: msg.Content,
530 }},
Josh Bleecher Snyder40c9da82025-07-24 21:08:20 +0000531 ToolError: false, // OpenAI doesn't specify errors explicitly; error information is parsed from content
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700532 }
533}
534
535// toLLMContents converts message content from OpenAI to []llm.Content.
536func toLLMContents(msg openai.ChatCompletionMessage) []llm.Content {
537 var contents []llm.Content
538
539 // If this is a tool response, handle it separately
540 if msg.Role == "tool" && msg.ToolCallID != "" {
541 return []llm.Content{toToolResultLLMContent(msg)}
542 }
543
544 // If there's text content, add it
545 if msg.Content != "" {
546 contents = append(contents, toRawLLMContent(msg.Content))
547 }
548
549 // If there are tool calls, add them
550 for _, tc := range msg.ToolCalls {
551 contents = append(contents, toToolCallLLMContent(tc))
552 }
553
554 // If empty, add an empty text content
555 if len(contents) == 0 {
556 contents = append(contents, llm.Content{
557 Type: llm.ContentTypeText,
558 Text: "",
559 })
560 }
561
562 return contents
563}
564
565// toLLMUsage converts usage information from OpenAI to llm.Usage.
Josh Bleecher Snyder59bb27d2025-06-05 07:32:10 -0700566func (s *Service) toLLMUsage(au openai.Usage, headers http.Header) llm.Usage {
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700567 // fmt.Printf("raw usage: %+v / %v / %v\n", au, au.PromptTokensDetails, au.CompletionTokensDetails)
568 in := uint64(au.PromptTokens)
569 var inc uint64
570 if au.PromptTokensDetails != nil {
571 inc = uint64(au.PromptTokensDetails.CachedTokens)
572 }
573 out := uint64(au.CompletionTokens)
574 u := llm.Usage{
575 InputTokens: in,
576 CacheReadInputTokens: inc,
577 CacheCreationInputTokens: in,
578 OutputTokens: out,
579 }
Josh Bleecher Snyder59bb27d2025-06-05 07:32:10 -0700580 u.CostUSD = llm.CostUSDFromResponse(headers)
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700581 return u
582}
583
584// toLLMResponse converts the OpenAI response to llm.Response.
585func (s *Service) toLLMResponse(r *openai.ChatCompletionResponse) *llm.Response {
586 // fmt.Printf("Raw response\n")
587 // enc := json.NewEncoder(os.Stdout)
588 // enc.SetIndent("", " ")
589 // enc.Encode(r)
590 // fmt.Printf("\n")
591
592 if len(r.Choices) == 0 {
593 return &llm.Response{
594 ID: r.ID,
595 Model: r.Model,
596 Role: llm.MessageRoleAssistant,
Josh Bleecher Snyder59bb27d2025-06-05 07:32:10 -0700597 Usage: s.toLLMUsage(r.Usage, r.Header()),
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700598 }
599 }
600
601 // Process the primary choice
602 choice := r.Choices[0]
603
604 return &llm.Response{
605 ID: r.ID,
606 Model: r.Model,
607 Role: toRoleFromString(choice.Message.Role),
608 Content: toLLMContents(choice.Message),
609 StopReason: toStopReason(string(choice.FinishReason)),
Josh Bleecher Snyder59bb27d2025-06-05 07:32:10 -0700610 Usage: s.toLLMUsage(r.Usage, r.Header()),
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700611 }
612}
613
614// toRoleFromString converts a role string to llm.MessageRole.
615func toRoleFromString(role string) llm.MessageRole {
616 if role == "tool" || role == "system" || role == "function" {
617 return llm.MessageRoleAssistant // Map special roles to assistant for consistency
618 }
619 if mr, ok := toLLMRole[role]; ok {
620 return mr
621 }
622 return llm.MessageRoleUser // Default to user if unknown
623}
624
625// toStopReason converts a finish reason string to llm.StopReason.
626func toStopReason(reason string) llm.StopReason {
627 if sr, ok := toLLMStopReason[reason]; ok {
628 return sr
629 }
630 return llm.StopReasonStopSequence // Default
631}
632
Philip Zeyligerb8a8f352025-06-02 07:39:37 -0700633// TokenContextWindow returns the maximum token context window size for this service
634func (s *Service) TokenContextWindow() int {
Josh Bleecher Snyderab3702c2025-07-24 20:22:50 +0000635 // TODO: move TokenContextWindow information to Model struct
636
Philip Zeyligerb8a8f352025-06-02 07:39:37 -0700637 model := cmp.Or(s.Model, DefaultModel)
638
639 // OpenAI models generally have 128k context windows
640 // Some newer models have larger windows, but 128k is a safe default
641 switch model.ModelName {
642 case "gpt-4.1-2025-04-14", "gpt-4.1-mini-2025-04-14", "gpt-4.1-nano-2025-04-14":
643 return 200000 // 200k for newer GPT-4.1 models
644 case "gpt-4o-2024-08-06", "gpt-4o-mini-2024-07-18":
645 return 128000 // 128k for GPT-4o models
646 case "o3-2025-04-16", "o3-mini-2025-04-16":
647 return 200000 // 200k for O3 models
Josh Bleecher Snyderab3702c2025-07-24 20:22:50 +0000648 case "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct":
649 return 256000 // 256k native context for Qwen3-Coder
Josh Bleecher Snyderd1c1ace2025-07-29 00:16:27 +0000650 case "qwen":
651 return 256000 // 256k native context for Qwen3-Coder
Philip Zeyligerb8a8f352025-06-02 07:39:37 -0700652 default:
653 // Default for unknown models
654 return 128000
655 }
656}
657
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700658// Do sends a request to OpenAI using the go-openai package.
659func (s *Service) Do(ctx context.Context, ir *llm.Request) (*llm.Response, error) {
660 // Configure the OpenAI client
661 httpc := cmp.Or(s.HTTPC, http.DefaultClient)
662 model := cmp.Or(s.Model, DefaultModel)
663
664 // TODO: do this one during Service setup? maybe with a constructor instead?
665 config := openai.DefaultConfig(s.APIKey)
Josh Bleecher Snyderd1c1ace2025-07-29 00:16:27 +0000666 if modelURLOverride := cmp.Or(s.ModelURL, model.URL); modelURLOverride != "" {
667 config.BaseURL = modelURLOverride
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700668 }
669 if s.Org != "" {
670 config.OrgID = s.Org
671 }
672 config.HTTPClient = httpc
673
674 client := openai.NewClientWithConfig(config)
675
676 // Start with system messages if provided
677 var allMessages []openai.ChatCompletionMessage
678 if len(ir.System) > 0 {
679 sysMessages := fromLLMSystem(ir.System)
680 allMessages = append(allMessages, sysMessages...)
681 }
682
683 // Add regular and tool messages
684 for _, msg := range ir.Messages {
685 msgs := fromLLMMessage(msg)
686 allMessages = append(allMessages, msgs...)
687 }
688
689 // Convert tools
690 var tools []openai.Tool
691 for _, t := range ir.Tools {
692 tools = append(tools, fromLLMTool(t))
693 }
694
695 // Create the OpenAI request
696 req := openai.ChatCompletionRequest{
697 Model: model.ModelName,
698 Messages: allMessages,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700699 Tools: tools,
700 ToolChoice: fromLLMToolChoice(ir.ToolChoice), // TODO: make fromLLMToolChoice return an error when a perfect translation is not possible
701 }
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700702 if model.IsReasoningModel {
703 req.MaxCompletionTokens = cmp.Or(s.MaxTokens, DefaultMaxTokens)
704 } else {
705 req.MaxTokens = cmp.Or(s.MaxTokens, DefaultMaxTokens)
706 }
Josh Bleecher Snyder57afbca2025-07-23 13:29:59 -0700707 // Dump request if enabled
708 if s.DumpLLM {
709 if reqJSON, err := json.MarshalIndent(req, "", " "); err == nil {
710 // Construct the chat completions URL
711 baseURL := cmp.Or(model.URL, OpenAIURL)
712 url := baseURL + "/chat/completions"
713 if err := llm.DumpToFile("request", url, reqJSON); err != nil {
714 slog.WarnContext(ctx, "failed to dump openai request to file", "error", err)
715 }
716 }
717 }
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700718
719 // Retry mechanism
Josh Bleecher Snyder38411992025-05-16 17:51:03 +0000720 backoff := []time.Duration{1 * time.Second, 2 * time.Second, 5 * time.Second, 10 * time.Second, 15 * time.Second}
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700721
722 // retry loop
Josh Bleecher Snyder38411992025-05-16 17:51:03 +0000723 var errs error // accumulated errors across all attempts
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700724 for attempts := 0; ; attempts++ {
Josh Bleecher Snyder38411992025-05-16 17:51:03 +0000725 if attempts > 10 {
726 return nil, fmt.Errorf("openai request failed after %d attempts: %w", attempts, errs)
727 }
728 if attempts > 0 {
729 sleep := backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
730 slog.WarnContext(ctx, "openai request sleep before retry", "sleep", sleep, "attempts", attempts)
731 time.Sleep(sleep)
732 }
733
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700734 resp, err := client.CreateChatCompletion(ctx, req)
735
736 // Handle successful response
737 if err == nil {
Josh Bleecher Snyder57afbca2025-07-23 13:29:59 -0700738 // Dump response if enabled
739 if s.DumpLLM {
740 if respJSON, jsonErr := json.MarshalIndent(resp, "", " "); jsonErr == nil {
741 if dumpErr := llm.DumpToFile("response", "", respJSON); dumpErr != nil {
742 slog.WarnContext(ctx, "failed to dump openai response to file", "error", dumpErr)
743 }
744 }
745 }
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700746 return s.toLLMResponse(&resp), nil
747 }
748
749 // Handle errors
crawshaw5c861652025-07-29 16:34:52 +0000750 // Check for TLS "bad record MAC" errors and retry once
751 if strings.Contains(err.Error(), "tls: bad record MAC") && attempts == 0 {
752 slog.WarnContext(ctx, "tls bad record MAC error, retrying once", "error", err.Error())
753 errs = errors.Join(errs, fmt.Errorf("TLS error (attempt %d): %w", attempts+1, err))
754 continue
755 }
756
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700757 var apiErr *openai.APIError
758 if ok := errors.As(err, &apiErr); !ok {
Josh Bleecher Snyder38411992025-05-16 17:51:03 +0000759 // Not an OpenAI API error, return immediately with accumulated errors
760 return nil, errors.Join(errs, err)
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700761 }
762
763 switch {
764 case apiErr.HTTPStatusCode >= 500:
765 // Server error, try again with backoff
Josh Bleecher Snyder38411992025-05-16 17:51:03 +0000766 slog.WarnContext(ctx, "openai_request_failed", "error", apiErr.Error(), "status_code", apiErr.HTTPStatusCode)
767 errs = errors.Join(errs, fmt.Errorf("status %d: %s", apiErr.HTTPStatusCode, apiErr.Error()))
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700768 continue
769
770 case apiErr.HTTPStatusCode == 429:
Josh Bleecher Snyder38411992025-05-16 17:51:03 +0000771 // Rate limited, accumulate error and retry
772 slog.WarnContext(ctx, "openai_request_rate_limited", "error", apiErr.Error())
773 errs = errors.Join(errs, fmt.Errorf("status %d (rate limited): %s", apiErr.HTTPStatusCode, apiErr.Error()))
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700774 continue
775
Josh Bleecher Snyder38411992025-05-16 17:51:03 +0000776 case apiErr.HTTPStatusCode >= 400 && apiErr.HTTPStatusCode < 500:
777 // Client error, probably unrecoverable
778 slog.WarnContext(ctx, "openai_request_failed", "error", apiErr.Error(), "status_code", apiErr.HTTPStatusCode)
779 return nil, errors.Join(errs, fmt.Errorf("status %d: %s", apiErr.HTTPStatusCode, apiErr.Error()))
780
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700781 default:
Josh Bleecher Snyder38411992025-05-16 17:51:03 +0000782 // Other error, accumulate and retry
783 slog.WarnContext(ctx, "openai_request_failed", "error", apiErr.Error(), "status_code", apiErr.HTTPStatusCode)
784 errs = errors.Join(errs, fmt.Errorf("status %d: %s", apiErr.HTTPStatusCode, apiErr.Error()))
785 continue
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700786 }
787 }
788}
Josh Bleecher Snyder994e9842025-07-30 20:26:47 -0700789
790func (s *Service) UseSimplifiedPatch() bool {
791 return s.Model.UseSimplifiedPatch
792}