blob: c561095c7593f4ae9327c0850c31c5ccb78fd62a [file] [log] [blame]
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -07001package oai
2
3import (
4 "cmp"
5 "context"
6 "encoding/json"
7 "errors"
8 "fmt"
9 "log/slog"
10 "math/rand/v2"
11 "net/http"
Philip Zeyliger72252cb2025-05-10 17:00:08 -070012 "strings"
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070013 "time"
14
15 "github.com/sashabaranov/go-openai"
16 "sketch.dev/llm"
17)
18
19const (
20 DefaultMaxTokens = 8192
21
22 OpenAIURL = "https://api.openai.com/v1"
23 FireworksURL = "https://api.fireworks.ai/inference/v1"
24 LlamaCPPURL = "http://localhost:8080/v1"
25 TogetherURL = "https://api.together.xyz/v1"
26 GeminiURL = "https://generativelanguage.googleapis.com/v1beta/openai/"
Josh Bleecher Snyderfa667032025-05-07 14:13:27 -070027 MistralURL = "https://api.mistral.ai/v1"
Josh Bleecher Snyder2edd62e2025-07-14 12:44:51 -070028 MoonshotURL = "https://api.moonshot.ai/v1"
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070029
30 // Environment variable names for API keys
31 OpenAIAPIKeyEnv = "OPENAI_API_KEY"
32 FireworksAPIKeyEnv = "FIREWORKS_API_KEY"
33 TogetherAPIKeyEnv = "TOGETHER_API_KEY"
34 GeminiAPIKeyEnv = "GEMINI_API_KEY"
Josh Bleecher Snyderfa667032025-05-07 14:13:27 -070035 MistralAPIKeyEnv = "MISTRAL_API_KEY"
Josh Bleecher Snyder2edd62e2025-07-14 12:44:51 -070036 MoonshotAPIKeyEnv = "MOONSHOT_API_KEY"
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070037)
38
39type Model struct {
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -070040 UserName string // provided by the user to identify this model (e.g. "gpt4.1")
41 ModelName string // provided to the service provide to specify which model to use (e.g. "gpt-4.1-2025-04-14")
42 URL string
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -070043 APIKeyEnv string // environment variable name for the API key
44 IsReasoningModel bool // whether this model is a reasoning model (e.g. O3, O4-mini)
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070045}
46
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070047var (
48 DefaultModel = GPT41
49
50 GPT41 = Model{
51 UserName: "gpt4.1",
52 ModelName: "gpt-4.1-2025-04-14",
53 URL: OpenAIURL,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070054 APIKeyEnv: OpenAIAPIKeyEnv,
55 }
56
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -070057 GPT4o = Model{
58 UserName: "gpt4o",
59 ModelName: "gpt-4o-2024-08-06",
60 URL: OpenAIURL,
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -070061 APIKeyEnv: OpenAIAPIKeyEnv,
62 }
63
64 GPT4oMini = Model{
65 UserName: "gpt4o-mini",
66 ModelName: "gpt-4o-mini-2024-07-18",
67 URL: OpenAIURL,
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -070068 APIKeyEnv: OpenAIAPIKeyEnv,
69 }
70
71 GPT41Mini = Model{
72 UserName: "gpt4.1-mini",
73 ModelName: "gpt-4.1-mini-2025-04-14",
74 URL: OpenAIURL,
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -070075 APIKeyEnv: OpenAIAPIKeyEnv,
76 }
77
78 GPT41Nano = Model{
79 UserName: "gpt4.1-nano",
80 ModelName: "gpt-4.1-nano-2025-04-14",
81 URL: OpenAIURL,
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -070082 APIKeyEnv: OpenAIAPIKeyEnv,
83 }
84
85 O3 = Model{
86 UserName: "o3",
87 ModelName: "o3-2025-04-16",
88 URL: OpenAIURL,
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -070089 APIKeyEnv: OpenAIAPIKeyEnv,
90 IsReasoningModel: true,
91 }
92
93 O4Mini = Model{
94 UserName: "o4-mini",
95 ModelName: "o4-mini-2025-04-16",
96 URL: OpenAIURL,
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -070097 APIKeyEnv: OpenAIAPIKeyEnv,
98 IsReasoningModel: true,
99 }
100
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700101 Gemini25Flash = Model{
102 UserName: "gemini-flash-2.5",
103 ModelName: "gemini-2.5-flash-preview-04-17",
104 URL: GeminiURL,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700105 APIKeyEnv: GeminiAPIKeyEnv,
106 }
107
108 Gemini25Pro = Model{
109 UserName: "gemini-pro-2.5",
110 ModelName: "gemini-2.5-pro-preview-03-25",
111 URL: GeminiURL,
112 // GRRRR. Really??
113 // Input is: $1.25, prompts <= 200k tokens, $2.50, prompts > 200k tokens
114 // Output is: $10.00, prompts <= 200k tokens, $15.00, prompts > 200k
115 // Caching is: $0.31, prompts <= 200k tokens, $0.625, prompts > 200k, $4.50 / 1,000,000 tokens per hour
116 // Whatever that means. Are we caching? I have no idea.
117 // How do you always manage to be the annoying one, Google?
118 // I'm not complicating things just for you.
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700119 APIKeyEnv: GeminiAPIKeyEnv,
120 }
121
122 TogetherDeepseekV3 = Model{
123 UserName: "together-deepseek-v3",
124 ModelName: "deepseek-ai/DeepSeek-V3",
125 URL: TogetherURL,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700126 APIKeyEnv: TogetherAPIKeyEnv,
127 }
128
Josh Bleecher Snyderd1bd5192025-06-02 14:10:52 -0700129 TogetherDeepseekR1 = Model{
130 UserName: "together-deepseek-r1",
131 ModelName: "deepseek-ai/DeepSeek-R1",
132 URL: TogetherURL,
Josh Bleecher Snyderd1bd5192025-06-02 14:10:52 -0700133 APIKeyEnv: TogetherAPIKeyEnv,
134 }
135
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700136 TogetherLlama4Maverick = Model{
137 UserName: "together-llama4-maverick",
138 ModelName: "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
139 URL: TogetherURL,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700140 APIKeyEnv: TogetherAPIKeyEnv,
141 }
142
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700143 FireworksLlama4Maverick = Model{
144 UserName: "fireworks-llama4-maverick",
145 ModelName: "accounts/fireworks/models/llama4-maverick-instruct-basic",
146 URL: FireworksURL,
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700147 APIKeyEnv: FireworksAPIKeyEnv,
148 }
149
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700150 TogetherLlama3_3_70B = Model{
151 UserName: "together-llama3-70b",
152 ModelName: "meta-llama/Llama-3.3-70B-Instruct-Turbo",
153 URL: TogetherURL,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700154 APIKeyEnv: TogetherAPIKeyEnv,
155 }
156
157 TogetherMistralSmall = Model{
158 UserName: "together-mistral-small",
159 ModelName: "mistralai/Mistral-Small-24B-Instruct-2501",
160 URL: TogetherURL,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700161 APIKeyEnv: TogetherAPIKeyEnv,
162 }
163
Josh Bleecher Snyder3e213082025-05-02 13:22:02 -0700164 TogetherQwen3 = Model{
165 UserName: "together-qwen3",
166 ModelName: "Qwen/Qwen3-235B-A22B-fp8-tput",
167 URL: TogetherURL,
Josh Bleecher Snyder3e213082025-05-02 13:22:02 -0700168 APIKeyEnv: TogetherAPIKeyEnv,
169 }
170
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700171 TogetherGemma2 = Model{
172 UserName: "together-gemma2",
173 ModelName: "google/gemma-2-27b-it",
174 URL: TogetherURL,
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700175 APIKeyEnv: TogetherAPIKeyEnv,
176 }
177
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700178 LlamaCPP = Model{
179 UserName: "llama.cpp",
180 ModelName: "llama.cpp local model",
181 URL: LlamaCPPURL,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700182 }
183
184 FireworksDeepseekV3 = Model{
185 UserName: "fireworks-deepseek-v3",
186 ModelName: "accounts/fireworks/models/deepseek-v3-0324",
187 URL: FireworksURL,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700188 APIKeyEnv: FireworksAPIKeyEnv,
189 }
Josh Bleecher Snyderfa667032025-05-07 14:13:27 -0700190
Josh Bleecher Snyder2edd62e2025-07-14 12:44:51 -0700191 MoonshotKimiK2 = Model{
192 UserName: "moonshot-kimi-k2",
193 ModelName: "moonshot-v1-auto",
194 URL: MoonshotURL,
195 APIKeyEnv: MoonshotAPIKeyEnv,
196 }
197
Josh Bleecher Snyderfa667032025-05-07 14:13:27 -0700198 MistralMedium = Model{
199 UserName: "mistral-medium-3",
200 ModelName: "mistral-medium-latest",
201 URL: MistralURL,
Josh Bleecher Snyderfa667032025-05-07 14:13:27 -0700202 APIKeyEnv: MistralAPIKeyEnv,
203 }
Josh Bleecher Snyder1a648f32025-05-21 17:15:04 +0000204
205 DevstralSmall = Model{
206 UserName: "devstral-small",
207 ModelName: "devstral-small-latest",
208 URL: MistralURL,
Josh Bleecher Snyder1a648f32025-05-21 17:15:04 +0000209 APIKeyEnv: MistralAPIKeyEnv,
210 }
Josh Bleecher Snyderab3702c2025-07-24 20:22:50 +0000211
212 Qwen3CoderFireworks = Model{
213 UserName: "qwen3-coder-fireworks",
214 ModelName: "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct",
215 URL: FireworksURL,
216 APIKeyEnv: FireworksAPIKeyEnv,
217 }
Josh Bleecher Snyderd1c1ace2025-07-29 00:16:27 +0000218
219 // Qwen is a skaband-specific model name for Qwen3-Coder
220 // Provider details (URL and APIKeyEnv) are handled by skaband
221 Qwen = Model{
222 UserName: "qwen",
223 ModelName: "qwen", // skaband will map this to the actual provider model
224 }
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700225)
226
227// Service provides chat completions.
228// Fields should not be altered concurrently with calling any method on Service.
229type Service struct {
230 HTTPC *http.Client // defaults to http.DefaultClient if nil
231 APIKey string // optional, if not set will try to load from env var
232 Model Model // defaults to DefaultModel if zero value
Josh Bleecher Snyderd1c1ace2025-07-29 00:16:27 +0000233 ModelURL string // optional, overrides Model.URL
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700234 MaxTokens int // defaults to DefaultMaxTokens if zero
235 Org string // optional - organization ID
Josh Bleecher Snyder57afbca2025-07-23 13:29:59 -0700236 DumpLLM bool // whether to dump request/response text to files for debugging; defaults to false
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700237}
238
239var _ llm.Service = (*Service)(nil)
240
241// ModelsRegistry is a registry of all known models with their user-friendly names.
242var ModelsRegistry = []Model{
243 GPT41,
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700244 GPT41Mini,
245 GPT41Nano,
246 GPT4o,
247 GPT4oMini,
248 O3,
249 O4Mini,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700250 Gemini25Flash,
251 Gemini25Pro,
252 TogetherDeepseekV3,
Josh Bleecher Snyderd1bd5192025-06-02 14:10:52 -0700253 TogetherDeepseekR1,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700254 TogetherLlama4Maverick,
255 TogetherLlama3_3_70B,
256 TogetherMistralSmall,
Josh Bleecher Snyder3e213082025-05-02 13:22:02 -0700257 TogetherQwen3,
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700258 TogetherGemma2,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700259 LlamaCPP,
260 FireworksDeepseekV3,
Josh Bleecher Snyder2edd62e2025-07-14 12:44:51 -0700261 MoonshotKimiK2,
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700262 FireworksLlama4Maverick,
263 MistralMedium,
Josh Bleecher Snyder1a648f32025-05-21 17:15:04 +0000264 DevstralSmall,
Josh Bleecher Snyderab3702c2025-07-24 20:22:50 +0000265 Qwen3CoderFireworks,
Josh Bleecher Snyderd1c1ace2025-07-29 00:16:27 +0000266 Qwen,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700267}
268
269// ListModels returns a list of all available models with their user-friendly names.
270func ListModels() []string {
271 var names []string
272 for _, model := range ModelsRegistry {
273 if model.UserName != "" {
274 names = append(names, model.UserName)
275 }
276 }
277 return names
278}
279
280// ModelByUserName returns a model by its user-friendly name.
281// Returns nil if no model with the given name is found.
Josh Bleecher Snyder0530da02025-07-23 03:47:43 +0000282func ModelByUserName(name string) Model {
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700283 for _, model := range ModelsRegistry {
284 if model.UserName == name {
Josh Bleecher Snyder0530da02025-07-23 03:47:43 +0000285 return model
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700286 }
287 }
Josh Bleecher Snyder0530da02025-07-23 03:47:43 +0000288 return Model{}
289}
290
291func (m Model) IsZero() bool {
292 return m == Model{}
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700293}
294
295var (
296 fromLLMRole = map[llm.MessageRole]string{
297 llm.MessageRoleAssistant: "assistant",
298 llm.MessageRoleUser: "user",
299 }
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700300 fromLLMToolChoiceType = map[llm.ToolChoiceType]string{
301 llm.ToolChoiceTypeAuto: "auto",
302 llm.ToolChoiceTypeAny: "any",
303 llm.ToolChoiceTypeNone: "none",
304 llm.ToolChoiceTypeTool: "function", // OpenAI uses "function" instead of "tool"
305 }
306 toLLMRole = map[string]llm.MessageRole{
307 "assistant": llm.MessageRoleAssistant,
308 "user": llm.MessageRoleUser,
309 }
310 toLLMStopReason = map[string]llm.StopReason{
311 "stop": llm.StopReasonStopSequence,
312 "length": llm.StopReasonMaxTokens,
313 "tool_calls": llm.StopReasonToolUse,
314 "function_call": llm.StopReasonToolUse, // Map both to ToolUse
315 "content_filter": llm.StopReasonStopSequence, // No direct equivalent
316 }
317)
318
319// fromLLMContent converts llm.Content to the format expected by OpenAI.
320func fromLLMContent(c llm.Content) (string, []openai.ToolCall) {
321 switch c.Type {
322 case llm.ContentTypeText:
323 return c.Text, nil
324 case llm.ContentTypeToolUse:
325 // For OpenAI, tool use is sent as a null content with tool_calls in the message
326 return "", []openai.ToolCall{
327 {
328 Type: openai.ToolTypeFunction,
329 ID: c.ID, // Use the content ID if provided
330 Function: openai.FunctionCall{
331 Name: c.ToolName,
332 Arguments: string(c.ToolInput),
333 },
334 },
335 }
336 case llm.ContentTypeToolResult:
337 // Tool results in OpenAI are sent as a separate message with tool_call_id
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700338 // OpenAI doesn't support multiple content items or images in tool results
339 // Combine all text content into a single string
340 var resultText string
341 if len(c.ToolResult) > 0 {
342 // Collect all text from content objects
343 texts := make([]string, 0, len(c.ToolResult))
344 for _, result := range c.ToolResult {
345 if result.Text != "" {
346 texts = append(texts, result.Text)
347 }
348 }
349 resultText = strings.Join(texts, "\n")
350 }
351 return resultText, nil
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700352 default:
353 // For thinking or other types, convert to text
354 return c.Text, nil
355 }
356}
357
358// fromLLMMessage converts llm.Message to OpenAI ChatCompletionMessage format
359func fromLLMMessage(msg llm.Message) []openai.ChatCompletionMessage {
360 // For OpenAI, we need to handle tool results differently than regular messages
361 // Each tool result becomes its own message with role="tool"
362
363 var messages []openai.ChatCompletionMessage
364
365 // Check if this is a regular message or contains tool results
366 var regularContent []llm.Content
367 var toolResults []llm.Content
368
369 for _, c := range msg.Content {
370 if c.Type == llm.ContentTypeToolResult {
371 toolResults = append(toolResults, c)
372 } else {
373 regularContent = append(regularContent, c)
374 }
375 }
376
377 // Process tool results as separate messages, but first
378 for _, tr := range toolResults {
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700379 // Convert toolresult array to a string for OpenAI
Josh Bleecher Snyder40c9da82025-07-24 21:08:20 +0000380 // Collect all text from content objects
381 var texts []string
382 for _, result := range tr.ToolResult {
383 if strings.TrimSpace(result.Text) != "" {
384 texts = append(texts, result.Text)
385 }
386 }
387 toolResultContent := strings.Join(texts, "\n")
388
389 // OpenAI doesn't have an explicit error field for tool results, so add it directly to the content.
390 if tr.ToolError {
391 if toolResultContent != "" {
392 toolResultContent = "error: " + toolResultContent
393 } else {
394 toolResultContent = "error: tool execution failed"
395 }
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700396 }
397
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700398 m := openai.ChatCompletionMessage{
399 Role: "tool",
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700400 Content: cmp.Or(toolResultContent, " "), // Use empty space if empty to avoid omitempty issues
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700401 ToolCallID: tr.ToolUseID,
402 }
403 messages = append(messages, m)
404 }
405 // Process regular content second
406 if len(regularContent) > 0 {
407 m := openai.ChatCompletionMessage{
408 Role: fromLLMRole[msg.Role],
409 }
410
411 // For assistant messages that contain tool calls
412 var toolCalls []openai.ToolCall
413 var textContent string
414
415 for _, c := range regularContent {
416 content, tools := fromLLMContent(c)
417 if len(tools) > 0 {
418 toolCalls = append(toolCalls, tools...)
419 } else if content != "" {
420 if textContent != "" {
421 textContent += "\n"
422 }
423 textContent += content
424 }
425 }
426
427 m.Content = textContent
428 m.ToolCalls = toolCalls
429
430 messages = append(messages, m)
431 }
432
433 return messages
434}
435
436// fromLLMToolChoice converts llm.ToolChoice to the format expected by OpenAI.
437func fromLLMToolChoice(tc *llm.ToolChoice) any {
438 if tc == nil {
439 return nil
440 }
441
442 if tc.Type == llm.ToolChoiceTypeTool && tc.Name != "" {
443 return openai.ToolChoice{
444 Type: openai.ToolTypeFunction,
445 Function: openai.ToolFunction{
446 Name: tc.Name,
447 },
448 }
449 }
450
451 // For non-specific tool choice, just use the string
452 return fromLLMToolChoiceType[tc.Type]
453}
454
455// fromLLMTool converts llm.Tool to the format expected by OpenAI.
456func fromLLMTool(t *llm.Tool) openai.Tool {
457 return openai.Tool{
458 Type: openai.ToolTypeFunction,
459 Function: &openai.FunctionDefinition{
460 Name: t.Name,
461 Description: t.Description,
462 Parameters: t.InputSchema,
463 },
464 }
465}
466
467// fromLLMSystem converts llm.SystemContent to an OpenAI system message.
468func fromLLMSystem(systemContent []llm.SystemContent) []openai.ChatCompletionMessage {
469 if len(systemContent) == 0 {
470 return nil
471 }
472
473 // Combine all system content into a single system message
474 var systemText string
475 for i, content := range systemContent {
476 if i > 0 && systemText != "" && content.Text != "" {
477 systemText += "\n"
478 }
479 systemText += content.Text
480 }
481
482 if systemText == "" {
483 return nil
484 }
485
486 return []openai.ChatCompletionMessage{
487 {
488 Role: "system",
489 Content: systemText,
490 },
491 }
492}
493
494// toRawLLMContent converts a raw content string from OpenAI to llm.Content.
495func toRawLLMContent(content string) llm.Content {
496 return llm.Content{
497 Type: llm.ContentTypeText,
498 Text: content,
499 }
500}
501
502// toToolCallLLMContent converts a tool call from OpenAI to llm.Content.
503func toToolCallLLMContent(toolCall openai.ToolCall) llm.Content {
504 // Generate a content ID if needed
505 id := toolCall.ID
506 if id == "" {
507 // Create a deterministic ID based on the function name if no ID is provided
508 id = "tc_" + toolCall.Function.Name
509 }
510
511 return llm.Content{
512 ID: id,
513 Type: llm.ContentTypeToolUse,
514 ToolName: toolCall.Function.Name,
515 ToolInput: json.RawMessage(toolCall.Function.Arguments),
516 }
517}
518
519// toToolResultLLMContent converts a tool result message from OpenAI to llm.Content.
520func toToolResultLLMContent(msg openai.ChatCompletionMessage) llm.Content {
521 return llm.Content{
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700522 Type: llm.ContentTypeToolResult,
523 ToolUseID: msg.ToolCallID,
524 ToolResult: []llm.Content{{
525 Type: llm.ContentTypeText,
526 Text: msg.Content,
527 }},
Josh Bleecher Snyder40c9da82025-07-24 21:08:20 +0000528 ToolError: false, // OpenAI doesn't specify errors explicitly; error information is parsed from content
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700529 }
530}
531
532// toLLMContents converts message content from OpenAI to []llm.Content.
533func toLLMContents(msg openai.ChatCompletionMessage) []llm.Content {
534 var contents []llm.Content
535
536 // If this is a tool response, handle it separately
537 if msg.Role == "tool" && msg.ToolCallID != "" {
538 return []llm.Content{toToolResultLLMContent(msg)}
539 }
540
541 // If there's text content, add it
542 if msg.Content != "" {
543 contents = append(contents, toRawLLMContent(msg.Content))
544 }
545
546 // If there are tool calls, add them
547 for _, tc := range msg.ToolCalls {
548 contents = append(contents, toToolCallLLMContent(tc))
549 }
550
551 // If empty, add an empty text content
552 if len(contents) == 0 {
553 contents = append(contents, llm.Content{
554 Type: llm.ContentTypeText,
555 Text: "",
556 })
557 }
558
559 return contents
560}
561
562// toLLMUsage converts usage information from OpenAI to llm.Usage.
Josh Bleecher Snyder59bb27d2025-06-05 07:32:10 -0700563func (s *Service) toLLMUsage(au openai.Usage, headers http.Header) llm.Usage {
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700564 // fmt.Printf("raw usage: %+v / %v / %v\n", au, au.PromptTokensDetails, au.CompletionTokensDetails)
565 in := uint64(au.PromptTokens)
566 var inc uint64
567 if au.PromptTokensDetails != nil {
568 inc = uint64(au.PromptTokensDetails.CachedTokens)
569 }
570 out := uint64(au.CompletionTokens)
571 u := llm.Usage{
572 InputTokens: in,
573 CacheReadInputTokens: inc,
574 CacheCreationInputTokens: in,
575 OutputTokens: out,
576 }
Josh Bleecher Snyder59bb27d2025-06-05 07:32:10 -0700577 u.CostUSD = llm.CostUSDFromResponse(headers)
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700578 return u
579}
580
581// toLLMResponse converts the OpenAI response to llm.Response.
582func (s *Service) toLLMResponse(r *openai.ChatCompletionResponse) *llm.Response {
583 // fmt.Printf("Raw response\n")
584 // enc := json.NewEncoder(os.Stdout)
585 // enc.SetIndent("", " ")
586 // enc.Encode(r)
587 // fmt.Printf("\n")
588
589 if len(r.Choices) == 0 {
590 return &llm.Response{
591 ID: r.ID,
592 Model: r.Model,
593 Role: llm.MessageRoleAssistant,
Josh Bleecher Snyder59bb27d2025-06-05 07:32:10 -0700594 Usage: s.toLLMUsage(r.Usage, r.Header()),
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700595 }
596 }
597
598 // Process the primary choice
599 choice := r.Choices[0]
600
601 return &llm.Response{
602 ID: r.ID,
603 Model: r.Model,
604 Role: toRoleFromString(choice.Message.Role),
605 Content: toLLMContents(choice.Message),
606 StopReason: toStopReason(string(choice.FinishReason)),
Josh Bleecher Snyder59bb27d2025-06-05 07:32:10 -0700607 Usage: s.toLLMUsage(r.Usage, r.Header()),
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700608 }
609}
610
611// toRoleFromString converts a role string to llm.MessageRole.
612func toRoleFromString(role string) llm.MessageRole {
613 if role == "tool" || role == "system" || role == "function" {
614 return llm.MessageRoleAssistant // Map special roles to assistant for consistency
615 }
616 if mr, ok := toLLMRole[role]; ok {
617 return mr
618 }
619 return llm.MessageRoleUser // Default to user if unknown
620}
621
622// toStopReason converts a finish reason string to llm.StopReason.
623func toStopReason(reason string) llm.StopReason {
624 if sr, ok := toLLMStopReason[reason]; ok {
625 return sr
626 }
627 return llm.StopReasonStopSequence // Default
628}
629
Philip Zeyligerb8a8f352025-06-02 07:39:37 -0700630// TokenContextWindow returns the maximum token context window size for this service
631func (s *Service) TokenContextWindow() int {
Josh Bleecher Snyderab3702c2025-07-24 20:22:50 +0000632 // TODO: move TokenContextWindow information to Model struct
633
Philip Zeyligerb8a8f352025-06-02 07:39:37 -0700634 model := cmp.Or(s.Model, DefaultModel)
635
636 // OpenAI models generally have 128k context windows
637 // Some newer models have larger windows, but 128k is a safe default
638 switch model.ModelName {
639 case "gpt-4.1-2025-04-14", "gpt-4.1-mini-2025-04-14", "gpt-4.1-nano-2025-04-14":
640 return 200000 // 200k for newer GPT-4.1 models
641 case "gpt-4o-2024-08-06", "gpt-4o-mini-2024-07-18":
642 return 128000 // 128k for GPT-4o models
643 case "o3-2025-04-16", "o3-mini-2025-04-16":
644 return 200000 // 200k for O3 models
Josh Bleecher Snyderab3702c2025-07-24 20:22:50 +0000645 case "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct":
646 return 256000 // 256k native context for Qwen3-Coder
Josh Bleecher Snyderd1c1ace2025-07-29 00:16:27 +0000647 case "qwen":
648 return 256000 // 256k native context for Qwen3-Coder
Philip Zeyligerb8a8f352025-06-02 07:39:37 -0700649 default:
650 // Default for unknown models
651 return 128000
652 }
653}
654
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700655// Do sends a request to OpenAI using the go-openai package.
656func (s *Service) Do(ctx context.Context, ir *llm.Request) (*llm.Response, error) {
657 // Configure the OpenAI client
658 httpc := cmp.Or(s.HTTPC, http.DefaultClient)
659 model := cmp.Or(s.Model, DefaultModel)
660
661 // TODO: do this one during Service setup? maybe with a constructor instead?
662 config := openai.DefaultConfig(s.APIKey)
Josh Bleecher Snyderd1c1ace2025-07-29 00:16:27 +0000663 if modelURLOverride := cmp.Or(s.ModelURL, model.URL); modelURLOverride != "" {
664 config.BaseURL = modelURLOverride
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700665 }
666 if s.Org != "" {
667 config.OrgID = s.Org
668 }
669 config.HTTPClient = httpc
670
671 client := openai.NewClientWithConfig(config)
672
673 // Start with system messages if provided
674 var allMessages []openai.ChatCompletionMessage
675 if len(ir.System) > 0 {
676 sysMessages := fromLLMSystem(ir.System)
677 allMessages = append(allMessages, sysMessages...)
678 }
679
680 // Add regular and tool messages
681 for _, msg := range ir.Messages {
682 msgs := fromLLMMessage(msg)
683 allMessages = append(allMessages, msgs...)
684 }
685
686 // Convert tools
687 var tools []openai.Tool
688 for _, t := range ir.Tools {
689 tools = append(tools, fromLLMTool(t))
690 }
691
692 // Create the OpenAI request
693 req := openai.ChatCompletionRequest{
694 Model: model.ModelName,
695 Messages: allMessages,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700696 Tools: tools,
697 ToolChoice: fromLLMToolChoice(ir.ToolChoice), // TODO: make fromLLMToolChoice return an error when a perfect translation is not possible
698 }
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700699 if model.IsReasoningModel {
700 req.MaxCompletionTokens = cmp.Or(s.MaxTokens, DefaultMaxTokens)
701 } else {
702 req.MaxTokens = cmp.Or(s.MaxTokens, DefaultMaxTokens)
703 }
Josh Bleecher Snyder57afbca2025-07-23 13:29:59 -0700704 // Dump request if enabled
705 if s.DumpLLM {
706 if reqJSON, err := json.MarshalIndent(req, "", " "); err == nil {
707 // Construct the chat completions URL
708 baseURL := cmp.Or(model.URL, OpenAIURL)
709 url := baseURL + "/chat/completions"
710 if err := llm.DumpToFile("request", url, reqJSON); err != nil {
711 slog.WarnContext(ctx, "failed to dump openai request to file", "error", err)
712 }
713 }
714 }
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700715
716 // Retry mechanism
Josh Bleecher Snyder38411992025-05-16 17:51:03 +0000717 backoff := []time.Duration{1 * time.Second, 2 * time.Second, 5 * time.Second, 10 * time.Second, 15 * time.Second}
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700718
719 // retry loop
Josh Bleecher Snyder38411992025-05-16 17:51:03 +0000720 var errs error // accumulated errors across all attempts
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700721 for attempts := 0; ; attempts++ {
Josh Bleecher Snyder38411992025-05-16 17:51:03 +0000722 if attempts > 10 {
723 return nil, fmt.Errorf("openai request failed after %d attempts: %w", attempts, errs)
724 }
725 if attempts > 0 {
726 sleep := backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
727 slog.WarnContext(ctx, "openai request sleep before retry", "sleep", sleep, "attempts", attempts)
728 time.Sleep(sleep)
729 }
730
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700731 resp, err := client.CreateChatCompletion(ctx, req)
732
733 // Handle successful response
734 if err == nil {
Josh Bleecher Snyder57afbca2025-07-23 13:29:59 -0700735 // Dump response if enabled
736 if s.DumpLLM {
737 if respJSON, jsonErr := json.MarshalIndent(resp, "", " "); jsonErr == nil {
738 if dumpErr := llm.DumpToFile("response", "", respJSON); dumpErr != nil {
739 slog.WarnContext(ctx, "failed to dump openai response to file", "error", dumpErr)
740 }
741 }
742 }
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700743 return s.toLLMResponse(&resp), nil
744 }
745
746 // Handle errors
crawshaw5c861652025-07-29 16:34:52 +0000747 // Check for TLS "bad record MAC" errors and retry once
748 if strings.Contains(err.Error(), "tls: bad record MAC") && attempts == 0 {
749 slog.WarnContext(ctx, "tls bad record MAC error, retrying once", "error", err.Error())
750 errs = errors.Join(errs, fmt.Errorf("TLS error (attempt %d): %w", attempts+1, err))
751 continue
752 }
753
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700754 var apiErr *openai.APIError
755 if ok := errors.As(err, &apiErr); !ok {
Josh Bleecher Snyder38411992025-05-16 17:51:03 +0000756 // Not an OpenAI API error, return immediately with accumulated errors
757 return nil, errors.Join(errs, err)
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700758 }
759
760 switch {
761 case apiErr.HTTPStatusCode >= 500:
762 // Server error, try again with backoff
Josh Bleecher Snyder38411992025-05-16 17:51:03 +0000763 slog.WarnContext(ctx, "openai_request_failed", "error", apiErr.Error(), "status_code", apiErr.HTTPStatusCode)
764 errs = errors.Join(errs, fmt.Errorf("status %d: %s", apiErr.HTTPStatusCode, apiErr.Error()))
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700765 continue
766
767 case apiErr.HTTPStatusCode == 429:
Josh Bleecher Snyder38411992025-05-16 17:51:03 +0000768 // Rate limited, accumulate error and retry
769 slog.WarnContext(ctx, "openai_request_rate_limited", "error", apiErr.Error())
770 errs = errors.Join(errs, fmt.Errorf("status %d (rate limited): %s", apiErr.HTTPStatusCode, apiErr.Error()))
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700771 continue
772
Josh Bleecher Snyder38411992025-05-16 17:51:03 +0000773 case apiErr.HTTPStatusCode >= 400 && apiErr.HTTPStatusCode < 500:
774 // Client error, probably unrecoverable
775 slog.WarnContext(ctx, "openai_request_failed", "error", apiErr.Error(), "status_code", apiErr.HTTPStatusCode)
776 return nil, errors.Join(errs, fmt.Errorf("status %d: %s", apiErr.HTTPStatusCode, apiErr.Error()))
777
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700778 default:
Josh Bleecher Snyder38411992025-05-16 17:51:03 +0000779 // Other error, accumulate and retry
780 slog.WarnContext(ctx, "openai_request_failed", "error", apiErr.Error(), "status_code", apiErr.HTTPStatusCode)
781 errs = errors.Join(errs, fmt.Errorf("status %d: %s", apiErr.HTTPStatusCode, apiErr.Error()))
782 continue
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700783 }
784 }
785}