blob: 8b64157e30fea81a4060784bd6bcc02bc2602584 [file] [log] [blame]
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -07001package oai
2
3import (
4 "cmp"
5 "context"
6 "encoding/json"
7 "errors"
8 "fmt"
9 "log/slog"
10 "math/rand/v2"
11 "net/http"
12 "time"
13
14 "github.com/sashabaranov/go-openai"
15 "sketch.dev/llm"
16)
17
18const (
19 DefaultMaxTokens = 8192
20
21 OpenAIURL = "https://api.openai.com/v1"
22 FireworksURL = "https://api.fireworks.ai/inference/v1"
23 LlamaCPPURL = "http://localhost:8080/v1"
24 TogetherURL = "https://api.together.xyz/v1"
25 GeminiURL = "https://generativelanguage.googleapis.com/v1beta/openai/"
Josh Bleecher Snyderfa667032025-05-07 14:13:27 -070026 MistralURL = "https://api.mistral.ai/v1"
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070027
28 // Environment variable names for API keys
29 OpenAIAPIKeyEnv = "OPENAI_API_KEY"
30 FireworksAPIKeyEnv = "FIREWORKS_API_KEY"
31 TogetherAPIKeyEnv = "TOGETHER_API_KEY"
32 GeminiAPIKeyEnv = "GEMINI_API_KEY"
Josh Bleecher Snyderfa667032025-05-07 14:13:27 -070033 MistralAPIKeyEnv = "MISTRAL_API_KEY"
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070034)
35
36type Model struct {
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -070037 UserName string // provided by the user to identify this model (e.g. "gpt4.1")
38 ModelName string // provided to the service provide to specify which model to use (e.g. "gpt-4.1-2025-04-14")
39 URL string
40 Cost ModelCost
41 APIKeyEnv string // environment variable name for the API key
42 IsReasoningModel bool // whether this model is a reasoning model (e.g. O3, O4-mini)
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070043}
44
45type ModelCost struct {
46 Input uint64 // in cents per million tokens
47 CachedInput uint64 // in cents per million tokens
48 Output uint64 // in cents per million tokens
49}
50
51var (
52 DefaultModel = GPT41
53
54 GPT41 = Model{
55 UserName: "gpt4.1",
56 ModelName: "gpt-4.1-2025-04-14",
57 URL: OpenAIURL,
58 Cost: ModelCost{Input: 200, CachedInput: 50, Output: 800},
59 APIKeyEnv: OpenAIAPIKeyEnv,
60 }
61
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -070062 GPT4o = Model{
63 UserName: "gpt4o",
64 ModelName: "gpt-4o-2024-08-06",
65 URL: OpenAIURL,
66 Cost: ModelCost{Input: 250, CachedInput: 125, Output: 1000},
67 APIKeyEnv: OpenAIAPIKeyEnv,
68 }
69
70 GPT4oMini = Model{
71 UserName: "gpt4o-mini",
72 ModelName: "gpt-4o-mini-2024-07-18",
73 URL: OpenAIURL,
74 Cost: ModelCost{Input: 15, CachedInput: 8, Output: 60}, // 8 is actually 7.5 GRRR round up for now oh well
75 APIKeyEnv: OpenAIAPIKeyEnv,
76 }
77
78 GPT41Mini = Model{
79 UserName: "gpt4.1-mini",
80 ModelName: "gpt-4.1-mini-2025-04-14",
81 URL: OpenAIURL,
82 Cost: ModelCost{Input: 40, CachedInput: 10, Output: 160},
83 APIKeyEnv: OpenAIAPIKeyEnv,
84 }
85
86 GPT41Nano = Model{
87 UserName: "gpt4.1-nano",
88 ModelName: "gpt-4.1-nano-2025-04-14",
89 URL: OpenAIURL,
90 Cost: ModelCost{Input: 10, CachedInput: 3, Output: 40}, // 3 is actually 2.5 GRRR round up for now oh well
91 APIKeyEnv: OpenAIAPIKeyEnv,
92 }
93
94 O3 = Model{
95 UserName: "o3",
96 ModelName: "o3-2025-04-16",
97 URL: OpenAIURL,
98 Cost: ModelCost{Input: 1000, CachedInput: 250, Output: 4000},
99 APIKeyEnv: OpenAIAPIKeyEnv,
100 IsReasoningModel: true,
101 }
102
103 O4Mini = Model{
104 UserName: "o4-mini",
105 ModelName: "o4-mini-2025-04-16",
106 URL: OpenAIURL,
107 Cost: ModelCost{Input: 110, CachedInput: 28, Output: 440}, // 28 is actually 27.5 GRRR round up for now oh well
108 APIKeyEnv: OpenAIAPIKeyEnv,
109 IsReasoningModel: true,
110 }
111
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700112 Gemini25Flash = Model{
113 UserName: "gemini-flash-2.5",
114 ModelName: "gemini-2.5-flash-preview-04-17",
115 URL: GeminiURL,
116 Cost: ModelCost{Input: 15, Output: 60},
117 APIKeyEnv: GeminiAPIKeyEnv,
118 }
119
120 Gemini25Pro = Model{
121 UserName: "gemini-pro-2.5",
122 ModelName: "gemini-2.5-pro-preview-03-25",
123 URL: GeminiURL,
124 // GRRRR. Really??
125 // Input is: $1.25, prompts <= 200k tokens, $2.50, prompts > 200k tokens
126 // Output is: $10.00, prompts <= 200k tokens, $15.00, prompts > 200k
127 // Caching is: $0.31, prompts <= 200k tokens, $0.625, prompts > 200k, $4.50 / 1,000,000 tokens per hour
128 // Whatever that means. Are we caching? I have no idea.
129 // How do you always manage to be the annoying one, Google?
130 // I'm not complicating things just for you.
131 Cost: ModelCost{Input: 125, Output: 1000},
132 APIKeyEnv: GeminiAPIKeyEnv,
133 }
134
135 TogetherDeepseekV3 = Model{
136 UserName: "together-deepseek-v3",
137 ModelName: "deepseek-ai/DeepSeek-V3",
138 URL: TogetherURL,
139 Cost: ModelCost{Input: 125, Output: 125},
140 APIKeyEnv: TogetherAPIKeyEnv,
141 }
142
143 TogetherLlama4Maverick = Model{
144 UserName: "together-llama4-maverick",
145 ModelName: "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
146 URL: TogetherURL,
147 Cost: ModelCost{Input: 27, Output: 85},
148 APIKeyEnv: TogetherAPIKeyEnv,
149 }
150
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700151 FireworksLlama4Maverick = Model{
152 UserName: "fireworks-llama4-maverick",
153 ModelName: "accounts/fireworks/models/llama4-maverick-instruct-basic",
154 URL: FireworksURL,
155 Cost: ModelCost{Input: 22, Output: 88},
156 APIKeyEnv: FireworksAPIKeyEnv,
157 }
158
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700159 TogetherLlama3_3_70B = Model{
160 UserName: "together-llama3-70b",
161 ModelName: "meta-llama/Llama-3.3-70B-Instruct-Turbo",
162 URL: TogetherURL,
163 Cost: ModelCost{Input: 88, Output: 88},
164 APIKeyEnv: TogetherAPIKeyEnv,
165 }
166
167 TogetherMistralSmall = Model{
168 UserName: "together-mistral-small",
169 ModelName: "mistralai/Mistral-Small-24B-Instruct-2501",
170 URL: TogetherURL,
171 Cost: ModelCost{Input: 80, Output: 80},
172 APIKeyEnv: TogetherAPIKeyEnv,
173 }
174
Josh Bleecher Snyder3e213082025-05-02 13:22:02 -0700175 TogetherQwen3 = Model{
176 UserName: "together-qwen3",
177 ModelName: "Qwen/Qwen3-235B-A22B-fp8-tput",
178 URL: TogetherURL,
179 Cost: ModelCost{Input: 20, Output: 60},
180 APIKeyEnv: TogetherAPIKeyEnv,
181 }
182
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700183 TogetherGemma2 = Model{
184 UserName: "together-gemma2",
185 ModelName: "google/gemma-2-27b-it",
186 URL: TogetherURL,
187 Cost: ModelCost{Input: 80, Output: 80},
188 APIKeyEnv: TogetherAPIKeyEnv,
189 }
190
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700191 LlamaCPP = Model{
192 UserName: "llama.cpp",
193 ModelName: "llama.cpp local model",
194 URL: LlamaCPPURL,
195 // zero cost
196 Cost: ModelCost{},
197 }
198
199 FireworksDeepseekV3 = Model{
200 UserName: "fireworks-deepseek-v3",
201 ModelName: "accounts/fireworks/models/deepseek-v3-0324",
202 URL: FireworksURL,
203 Cost: ModelCost{Input: 90, Output: 90}, // not entirely sure about this, they don't list pricing anywhere convenient
204 APIKeyEnv: FireworksAPIKeyEnv,
205 }
Josh Bleecher Snyderfa667032025-05-07 14:13:27 -0700206
207 MistralMedium = Model{
208 UserName: "mistral-medium-3",
209 ModelName: "mistral-medium-latest",
210 URL: MistralURL,
211 Cost: ModelCost{Input: 40, Output: 200},
212 APIKeyEnv: MistralAPIKeyEnv,
213 }
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700214)
215
216// Service provides chat completions.
217// Fields should not be altered concurrently with calling any method on Service.
218type Service struct {
219 HTTPC *http.Client // defaults to http.DefaultClient if nil
220 APIKey string // optional, if not set will try to load from env var
221 Model Model // defaults to DefaultModel if zero value
222 MaxTokens int // defaults to DefaultMaxTokens if zero
223 Org string // optional - organization ID
224}
225
226var _ llm.Service = (*Service)(nil)
227
228// ModelsRegistry is a registry of all known models with their user-friendly names.
229var ModelsRegistry = []Model{
230 GPT41,
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700231 GPT41Mini,
232 GPT41Nano,
233 GPT4o,
234 GPT4oMini,
235 O3,
236 O4Mini,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700237 Gemini25Flash,
238 Gemini25Pro,
239 TogetherDeepseekV3,
240 TogetherLlama4Maverick,
241 TogetherLlama3_3_70B,
242 TogetherMistralSmall,
Josh Bleecher Snyder3e213082025-05-02 13:22:02 -0700243 TogetherQwen3,
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700244 TogetherGemma2,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700245 LlamaCPP,
246 FireworksDeepseekV3,
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700247 FireworksLlama4Maverick,
248 MistralMedium,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700249}
250
251// ListModels returns a list of all available models with their user-friendly names.
252func ListModels() []string {
253 var names []string
254 for _, model := range ModelsRegistry {
255 if model.UserName != "" {
256 names = append(names, model.UserName)
257 }
258 }
259 return names
260}
261
262// ModelByUserName returns a model by its user-friendly name.
263// Returns nil if no model with the given name is found.
264func ModelByUserName(name string) *Model {
265 for _, model := range ModelsRegistry {
266 if model.UserName == name {
267 return &model
268 }
269 }
270 return nil
271}
272
273var (
274 fromLLMRole = map[llm.MessageRole]string{
275 llm.MessageRoleAssistant: "assistant",
276 llm.MessageRoleUser: "user",
277 }
278 fromLLMContentType = map[llm.ContentType]string{
279 llm.ContentTypeText: "text",
280 llm.ContentTypeToolUse: "function", // OpenAI uses function instead of tool_call
281 llm.ContentTypeToolResult: "tool_result",
282 llm.ContentTypeThinking: "text", // Map thinking to text since OpenAI doesn't have thinking
283 llm.ContentTypeRedactedThinking: "text", // Map redacted_thinking to text
284 }
285 fromLLMToolChoiceType = map[llm.ToolChoiceType]string{
286 llm.ToolChoiceTypeAuto: "auto",
287 llm.ToolChoiceTypeAny: "any",
288 llm.ToolChoiceTypeNone: "none",
289 llm.ToolChoiceTypeTool: "function", // OpenAI uses "function" instead of "tool"
290 }
291 toLLMRole = map[string]llm.MessageRole{
292 "assistant": llm.MessageRoleAssistant,
293 "user": llm.MessageRoleUser,
294 }
295 toLLMStopReason = map[string]llm.StopReason{
296 "stop": llm.StopReasonStopSequence,
297 "length": llm.StopReasonMaxTokens,
298 "tool_calls": llm.StopReasonToolUse,
299 "function_call": llm.StopReasonToolUse, // Map both to ToolUse
300 "content_filter": llm.StopReasonStopSequence, // No direct equivalent
301 }
302)
303
304// fromLLMContent converts llm.Content to the format expected by OpenAI.
305func fromLLMContent(c llm.Content) (string, []openai.ToolCall) {
306 switch c.Type {
307 case llm.ContentTypeText:
308 return c.Text, nil
309 case llm.ContentTypeToolUse:
310 // For OpenAI, tool use is sent as a null content with tool_calls in the message
311 return "", []openai.ToolCall{
312 {
313 Type: openai.ToolTypeFunction,
314 ID: c.ID, // Use the content ID if provided
315 Function: openai.FunctionCall{
316 Name: c.ToolName,
317 Arguments: string(c.ToolInput),
318 },
319 },
320 }
321 case llm.ContentTypeToolResult:
322 // Tool results in OpenAI are sent as a separate message with tool_call_id
323 return c.ToolResult, nil
324 default:
325 // For thinking or other types, convert to text
326 return c.Text, nil
327 }
328}
329
330// fromLLMMessage converts llm.Message to OpenAI ChatCompletionMessage format
331func fromLLMMessage(msg llm.Message) []openai.ChatCompletionMessage {
332 // For OpenAI, we need to handle tool results differently than regular messages
333 // Each tool result becomes its own message with role="tool"
334
335 var messages []openai.ChatCompletionMessage
336
337 // Check if this is a regular message or contains tool results
338 var regularContent []llm.Content
339 var toolResults []llm.Content
340
341 for _, c := range msg.Content {
342 if c.Type == llm.ContentTypeToolResult {
343 toolResults = append(toolResults, c)
344 } else {
345 regularContent = append(regularContent, c)
346 }
347 }
348
349 // Process tool results as separate messages, but first
350 for _, tr := range toolResults {
351 m := openai.ChatCompletionMessage{
352 Role: "tool",
353 Content: cmp.Or(tr.ToolResult, " "), // TODO: remove omitempty upstream
354 ToolCallID: tr.ToolUseID,
355 }
356 messages = append(messages, m)
357 }
358 // Process regular content second
359 if len(regularContent) > 0 {
360 m := openai.ChatCompletionMessage{
361 Role: fromLLMRole[msg.Role],
362 }
363
364 // For assistant messages that contain tool calls
365 var toolCalls []openai.ToolCall
366 var textContent string
367
368 for _, c := range regularContent {
369 content, tools := fromLLMContent(c)
370 if len(tools) > 0 {
371 toolCalls = append(toolCalls, tools...)
372 } else if content != "" {
373 if textContent != "" {
374 textContent += "\n"
375 }
376 textContent += content
377 }
378 }
379
380 m.Content = textContent
381 m.ToolCalls = toolCalls
382
383 messages = append(messages, m)
384 }
385
386 return messages
387}
388
389// fromLLMToolChoice converts llm.ToolChoice to the format expected by OpenAI.
390func fromLLMToolChoice(tc *llm.ToolChoice) any {
391 if tc == nil {
392 return nil
393 }
394
395 if tc.Type == llm.ToolChoiceTypeTool && tc.Name != "" {
396 return openai.ToolChoice{
397 Type: openai.ToolTypeFunction,
398 Function: openai.ToolFunction{
399 Name: tc.Name,
400 },
401 }
402 }
403
404 // For non-specific tool choice, just use the string
405 return fromLLMToolChoiceType[tc.Type]
406}
407
408// fromLLMTool converts llm.Tool to the format expected by OpenAI.
409func fromLLMTool(t *llm.Tool) openai.Tool {
410 return openai.Tool{
411 Type: openai.ToolTypeFunction,
412 Function: &openai.FunctionDefinition{
413 Name: t.Name,
414 Description: t.Description,
415 Parameters: t.InputSchema,
416 },
417 }
418}
419
420// fromLLMSystem converts llm.SystemContent to an OpenAI system message.
421func fromLLMSystem(systemContent []llm.SystemContent) []openai.ChatCompletionMessage {
422 if len(systemContent) == 0 {
423 return nil
424 }
425
426 // Combine all system content into a single system message
427 var systemText string
428 for i, content := range systemContent {
429 if i > 0 && systemText != "" && content.Text != "" {
430 systemText += "\n"
431 }
432 systemText += content.Text
433 }
434
435 if systemText == "" {
436 return nil
437 }
438
439 return []openai.ChatCompletionMessage{
440 {
441 Role: "system",
442 Content: systemText,
443 },
444 }
445}
446
447// toRawLLMContent converts a raw content string from OpenAI to llm.Content.
448func toRawLLMContent(content string) llm.Content {
449 return llm.Content{
450 Type: llm.ContentTypeText,
451 Text: content,
452 }
453}
454
455// toToolCallLLMContent converts a tool call from OpenAI to llm.Content.
456func toToolCallLLMContent(toolCall openai.ToolCall) llm.Content {
457 // Generate a content ID if needed
458 id := toolCall.ID
459 if id == "" {
460 // Create a deterministic ID based on the function name if no ID is provided
461 id = "tc_" + toolCall.Function.Name
462 }
463
464 return llm.Content{
465 ID: id,
466 Type: llm.ContentTypeToolUse,
467 ToolName: toolCall.Function.Name,
468 ToolInput: json.RawMessage(toolCall.Function.Arguments),
469 }
470}
471
472// toToolResultLLMContent converts a tool result message from OpenAI to llm.Content.
473func toToolResultLLMContent(msg openai.ChatCompletionMessage) llm.Content {
474 return llm.Content{
475 Type: llm.ContentTypeToolResult,
476 ToolUseID: msg.ToolCallID,
477 ToolResult: msg.Content,
478 ToolError: false, // OpenAI doesn't specify errors explicitly
479 }
480}
481
482// toLLMContents converts message content from OpenAI to []llm.Content.
483func toLLMContents(msg openai.ChatCompletionMessage) []llm.Content {
484 var contents []llm.Content
485
486 // If this is a tool response, handle it separately
487 if msg.Role == "tool" && msg.ToolCallID != "" {
488 return []llm.Content{toToolResultLLMContent(msg)}
489 }
490
491 // If there's text content, add it
492 if msg.Content != "" {
493 contents = append(contents, toRawLLMContent(msg.Content))
494 }
495
496 // If there are tool calls, add them
497 for _, tc := range msg.ToolCalls {
498 contents = append(contents, toToolCallLLMContent(tc))
499 }
500
501 // If empty, add an empty text content
502 if len(contents) == 0 {
503 contents = append(contents, llm.Content{
504 Type: llm.ContentTypeText,
505 Text: "",
506 })
507 }
508
509 return contents
510}
511
512// toLLMUsage converts usage information from OpenAI to llm.Usage.
Josh Bleecher Snyder66439b02025-05-02 18:35:32 -0700513func (s *Service) toLLMUsage(au openai.Usage) llm.Usage {
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700514 // fmt.Printf("raw usage: %+v / %v / %v\n", au, au.PromptTokensDetails, au.CompletionTokensDetails)
515 in := uint64(au.PromptTokens)
516 var inc uint64
517 if au.PromptTokensDetails != nil {
518 inc = uint64(au.PromptTokensDetails.CachedTokens)
519 }
520 out := uint64(au.CompletionTokens)
521 u := llm.Usage{
522 InputTokens: in,
523 CacheReadInputTokens: inc,
524 CacheCreationInputTokens: in,
525 OutputTokens: out,
526 }
527 u.CostUSD = s.calculateCostFromTokens(u)
528 return u
529}
530
531// toLLMResponse converts the OpenAI response to llm.Response.
532func (s *Service) toLLMResponse(r *openai.ChatCompletionResponse) *llm.Response {
533 // fmt.Printf("Raw response\n")
534 // enc := json.NewEncoder(os.Stdout)
535 // enc.SetIndent("", " ")
536 // enc.Encode(r)
537 // fmt.Printf("\n")
538
539 if len(r.Choices) == 0 {
540 return &llm.Response{
541 ID: r.ID,
542 Model: r.Model,
543 Role: llm.MessageRoleAssistant,
Josh Bleecher Snyder66439b02025-05-02 18:35:32 -0700544 Usage: s.toLLMUsage(r.Usage),
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700545 }
546 }
547
548 // Process the primary choice
549 choice := r.Choices[0]
550
551 return &llm.Response{
552 ID: r.ID,
553 Model: r.Model,
554 Role: toRoleFromString(choice.Message.Role),
555 Content: toLLMContents(choice.Message),
556 StopReason: toStopReason(string(choice.FinishReason)),
Josh Bleecher Snyder66439b02025-05-02 18:35:32 -0700557 Usage: s.toLLMUsage(r.Usage),
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700558 }
559}
560
561// toRoleFromString converts a role string to llm.MessageRole.
562func toRoleFromString(role string) llm.MessageRole {
563 if role == "tool" || role == "system" || role == "function" {
564 return llm.MessageRoleAssistant // Map special roles to assistant for consistency
565 }
566 if mr, ok := toLLMRole[role]; ok {
567 return mr
568 }
569 return llm.MessageRoleUser // Default to user if unknown
570}
571
572// toStopReason converts a finish reason string to llm.StopReason.
573func toStopReason(reason string) llm.StopReason {
574 if sr, ok := toLLMStopReason[reason]; ok {
575 return sr
576 }
577 return llm.StopReasonStopSequence // Default
578}
579
580// calculateCostFromTokens calculates the cost in dollars for the given model and token counts.
581func (s *Service) calculateCostFromTokens(u llm.Usage) float64 {
582 cost := s.Model.Cost
583
584 // TODO: check this for correctness, i am skeptical
585 // Calculate cost in cents
586 megaCents := u.CacheCreationInputTokens*cost.Input +
587 u.CacheReadInputTokens*cost.CachedInput +
588 u.OutputTokens*cost.Output
589
590 cents := float64(megaCents) / 1_000_000
591 // Convert to dollars
592 dollars := cents / 100.0
593 // fmt.Printf("in_new=%d, in_cached=%d, out=%d, cost=%.2f\n", u.CacheCreationInputTokens, u.CacheReadInputTokens, u.OutputTokens, dollars)
594 return dollars
595}
596
597// Do sends a request to OpenAI using the go-openai package.
598func (s *Service) Do(ctx context.Context, ir *llm.Request) (*llm.Response, error) {
599 // Configure the OpenAI client
600 httpc := cmp.Or(s.HTTPC, http.DefaultClient)
601 model := cmp.Or(s.Model, DefaultModel)
602
603 // TODO: do this one during Service setup? maybe with a constructor instead?
604 config := openai.DefaultConfig(s.APIKey)
605 if model.URL != "" {
606 config.BaseURL = model.URL
607 }
608 if s.Org != "" {
609 config.OrgID = s.Org
610 }
611 config.HTTPClient = httpc
612
613 client := openai.NewClientWithConfig(config)
614
615 // Start with system messages if provided
616 var allMessages []openai.ChatCompletionMessage
617 if len(ir.System) > 0 {
618 sysMessages := fromLLMSystem(ir.System)
619 allMessages = append(allMessages, sysMessages...)
620 }
621
622 // Add regular and tool messages
623 for _, msg := range ir.Messages {
624 msgs := fromLLMMessage(msg)
625 allMessages = append(allMessages, msgs...)
626 }
627
628 // Convert tools
629 var tools []openai.Tool
630 for _, t := range ir.Tools {
631 tools = append(tools, fromLLMTool(t))
632 }
633
634 // Create the OpenAI request
635 req := openai.ChatCompletionRequest{
636 Model: model.ModelName,
637 Messages: allMessages,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700638 Tools: tools,
639 ToolChoice: fromLLMToolChoice(ir.ToolChoice), // TODO: make fromLLMToolChoice return an error when a perfect translation is not possible
640 }
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700641 if model.IsReasoningModel {
642 req.MaxCompletionTokens = cmp.Or(s.MaxTokens, DefaultMaxTokens)
643 } else {
644 req.MaxTokens = cmp.Or(s.MaxTokens, DefaultMaxTokens)
645 }
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700646 // fmt.Printf("Sending request to OpenAI\n")
647 // enc := json.NewEncoder(os.Stdout)
648 // enc.SetIndent("", " ")
649 // enc.Encode(req)
650 // fmt.Printf("\n")
651
652 // Retry mechanism
653 backoff := []time.Duration{1 * time.Second, 2 * time.Second, 5 * time.Second}
654
655 // retry loop
656 for attempts := 0; ; attempts++ {
657 resp, err := client.CreateChatCompletion(ctx, req)
658
659 // Handle successful response
660 if err == nil {
661 return s.toLLMResponse(&resp), nil
662 }
663
664 // Handle errors
665 var apiErr *openai.APIError
666 if ok := errors.As(err, &apiErr); !ok {
667 // Not an OpenAI API error, return immediately
668 return nil, err
669 }
670
671 switch {
672 case apiErr.HTTPStatusCode >= 500:
673 // Server error, try again with backoff
674 sleep := backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
675 slog.WarnContext(ctx, "openai_request_failed", "error", apiErr.Error(), "status_code", apiErr.HTTPStatusCode, "sleep", sleep)
676 time.Sleep(sleep)
677 continue
678
679 case apiErr.HTTPStatusCode == 429:
680 // Rate limited, back off longer
681 sleep := 20*time.Second + backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
682 slog.WarnContext(ctx, "openai_request_rate_limited", "error", apiErr.Error(), "sleep", sleep)
683 time.Sleep(sleep)
684 continue
685
686 default:
687 // Other error, return immediately
688 return nil, fmt.Errorf("OpenAI API error: %w", err)
689 }
690 }
691}