blob: 4b046b008efec70211fde035a3713af1de390471 [file] [log] [blame]
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -07001package oai
2
3import (
4 "cmp"
5 "context"
6 "encoding/json"
7 "errors"
8 "fmt"
9 "log/slog"
10 "math/rand/v2"
11 "net/http"
12 "time"
13
14 "github.com/sashabaranov/go-openai"
15 "sketch.dev/llm"
16)
17
18const (
19 DefaultMaxTokens = 8192
20
21 OpenAIURL = "https://api.openai.com/v1"
22 FireworksURL = "https://api.fireworks.ai/inference/v1"
23 LlamaCPPURL = "http://localhost:8080/v1"
24 TogetherURL = "https://api.together.xyz/v1"
25 GeminiURL = "https://generativelanguage.googleapis.com/v1beta/openai/"
Josh Bleecher Snyderfa667032025-05-07 14:13:27 -070026 MistralURL = "https://api.mistral.ai/v1"
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070027
28 // Environment variable names for API keys
29 OpenAIAPIKeyEnv = "OPENAI_API_KEY"
30 FireworksAPIKeyEnv = "FIREWORKS_API_KEY"
31 TogetherAPIKeyEnv = "TOGETHER_API_KEY"
32 GeminiAPIKeyEnv = "GEMINI_API_KEY"
Josh Bleecher Snyderfa667032025-05-07 14:13:27 -070033 MistralAPIKeyEnv = "MISTRAL_API_KEY"
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070034)
35
36type Model struct {
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -070037 UserName string // provided by the user to identify this model (e.g. "gpt4.1")
38 ModelName string // provided to the service provide to specify which model to use (e.g. "gpt-4.1-2025-04-14")
39 URL string
40 Cost ModelCost
41 APIKeyEnv string // environment variable name for the API key
42 IsReasoningModel bool // whether this model is a reasoning model (e.g. O3, O4-mini)
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070043}
44
45type ModelCost struct {
46 Input uint64 // in cents per million tokens
47 CachedInput uint64 // in cents per million tokens
48 Output uint64 // in cents per million tokens
49}
50
51var (
52 DefaultModel = GPT41
53
54 GPT41 = Model{
55 UserName: "gpt4.1",
56 ModelName: "gpt-4.1-2025-04-14",
57 URL: OpenAIURL,
58 Cost: ModelCost{Input: 200, CachedInput: 50, Output: 800},
59 APIKeyEnv: OpenAIAPIKeyEnv,
60 }
61
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -070062 GPT4o = Model{
63 UserName: "gpt4o",
64 ModelName: "gpt-4o-2024-08-06",
65 URL: OpenAIURL,
66 Cost: ModelCost{Input: 250, CachedInput: 125, Output: 1000},
67 APIKeyEnv: OpenAIAPIKeyEnv,
68 }
69
70 GPT4oMini = Model{
71 UserName: "gpt4o-mini",
72 ModelName: "gpt-4o-mini-2024-07-18",
73 URL: OpenAIURL,
74 Cost: ModelCost{Input: 15, CachedInput: 8, Output: 60}, // 8 is actually 7.5 GRRR round up for now oh well
75 APIKeyEnv: OpenAIAPIKeyEnv,
76 }
77
78 GPT41Mini = Model{
79 UserName: "gpt4.1-mini",
80 ModelName: "gpt-4.1-mini-2025-04-14",
81 URL: OpenAIURL,
82 Cost: ModelCost{Input: 40, CachedInput: 10, Output: 160},
83 APIKeyEnv: OpenAIAPIKeyEnv,
84 }
85
86 GPT41Nano = Model{
87 UserName: "gpt4.1-nano",
88 ModelName: "gpt-4.1-nano-2025-04-14",
89 URL: OpenAIURL,
90 Cost: ModelCost{Input: 10, CachedInput: 3, Output: 40}, // 3 is actually 2.5 GRRR round up for now oh well
91 APIKeyEnv: OpenAIAPIKeyEnv,
92 }
93
94 O3 = Model{
95 UserName: "o3",
96 ModelName: "o3-2025-04-16",
97 URL: OpenAIURL,
98 Cost: ModelCost{Input: 1000, CachedInput: 250, Output: 4000},
99 APIKeyEnv: OpenAIAPIKeyEnv,
100 IsReasoningModel: true,
101 }
102
103 O4Mini = Model{
104 UserName: "o4-mini",
105 ModelName: "o4-mini-2025-04-16",
106 URL: OpenAIURL,
107 Cost: ModelCost{Input: 110, CachedInput: 28, Output: 440}, // 28 is actually 27.5 GRRR round up for now oh well
108 APIKeyEnv: OpenAIAPIKeyEnv,
109 IsReasoningModel: true,
110 }
111
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700112 Gemini25Flash = Model{
113 UserName: "gemini-flash-2.5",
114 ModelName: "gemini-2.5-flash-preview-04-17",
115 URL: GeminiURL,
116 Cost: ModelCost{Input: 15, Output: 60},
117 APIKeyEnv: GeminiAPIKeyEnv,
118 }
119
120 Gemini25Pro = Model{
121 UserName: "gemini-pro-2.5",
122 ModelName: "gemini-2.5-pro-preview-03-25",
123 URL: GeminiURL,
124 // GRRRR. Really??
125 // Input is: $1.25, prompts <= 200k tokens, $2.50, prompts > 200k tokens
126 // Output is: $10.00, prompts <= 200k tokens, $15.00, prompts > 200k
127 // Caching is: $0.31, prompts <= 200k tokens, $0.625, prompts > 200k, $4.50 / 1,000,000 tokens per hour
128 // Whatever that means. Are we caching? I have no idea.
129 // How do you always manage to be the annoying one, Google?
130 // I'm not complicating things just for you.
131 Cost: ModelCost{Input: 125, Output: 1000},
132 APIKeyEnv: GeminiAPIKeyEnv,
133 }
134
135 TogetherDeepseekV3 = Model{
136 UserName: "together-deepseek-v3",
137 ModelName: "deepseek-ai/DeepSeek-V3",
138 URL: TogetherURL,
139 Cost: ModelCost{Input: 125, Output: 125},
140 APIKeyEnv: TogetherAPIKeyEnv,
141 }
142
143 TogetherLlama4Maverick = Model{
144 UserName: "together-llama4-maverick",
145 ModelName: "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
146 URL: TogetherURL,
147 Cost: ModelCost{Input: 27, Output: 85},
148 APIKeyEnv: TogetherAPIKeyEnv,
149 }
150
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700151 FireworksLlama4Maverick = Model{
152 UserName: "fireworks-llama4-maverick",
153 ModelName: "accounts/fireworks/models/llama4-maverick-instruct-basic",
154 URL: FireworksURL,
155 Cost: ModelCost{Input: 22, Output: 88},
156 APIKeyEnv: FireworksAPIKeyEnv,
157 }
158
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700159 TogetherLlama3_3_70B = Model{
160 UserName: "together-llama3-70b",
161 ModelName: "meta-llama/Llama-3.3-70B-Instruct-Turbo",
162 URL: TogetherURL,
163 Cost: ModelCost{Input: 88, Output: 88},
164 APIKeyEnv: TogetherAPIKeyEnv,
165 }
166
167 TogetherMistralSmall = Model{
168 UserName: "together-mistral-small",
169 ModelName: "mistralai/Mistral-Small-24B-Instruct-2501",
170 URL: TogetherURL,
171 Cost: ModelCost{Input: 80, Output: 80},
172 APIKeyEnv: TogetherAPIKeyEnv,
173 }
174
Josh Bleecher Snyder3e213082025-05-02 13:22:02 -0700175 TogetherQwen3 = Model{
176 UserName: "together-qwen3",
177 ModelName: "Qwen/Qwen3-235B-A22B-fp8-tput",
178 URL: TogetherURL,
179 Cost: ModelCost{Input: 20, Output: 60},
180 APIKeyEnv: TogetherAPIKeyEnv,
181 }
182
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700183 TogetherGemma2 = Model{
184 UserName: "together-gemma2",
185 ModelName: "google/gemma-2-27b-it",
186 URL: TogetherURL,
187 Cost: ModelCost{Input: 80, Output: 80},
188 APIKeyEnv: TogetherAPIKeyEnv,
189 }
190
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700191 LlamaCPP = Model{
192 UserName: "llama.cpp",
193 ModelName: "llama.cpp local model",
194 URL: LlamaCPPURL,
195 // zero cost
196 Cost: ModelCost{},
197 }
198
199 FireworksDeepseekV3 = Model{
200 UserName: "fireworks-deepseek-v3",
201 ModelName: "accounts/fireworks/models/deepseek-v3-0324",
202 URL: FireworksURL,
203 Cost: ModelCost{Input: 90, Output: 90}, // not entirely sure about this, they don't list pricing anywhere convenient
204 APIKeyEnv: FireworksAPIKeyEnv,
205 }
Josh Bleecher Snyderfa667032025-05-07 14:13:27 -0700206
207 MistralMedium = Model{
208 UserName: "mistral-medium-3",
209 ModelName: "mistral-medium-latest",
210 URL: MistralURL,
211 Cost: ModelCost{Input: 40, Output: 200},
212 APIKeyEnv: MistralAPIKeyEnv,
213 }
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700214)
215
216// Service provides chat completions.
217// Fields should not be altered concurrently with calling any method on Service.
218type Service struct {
219 HTTPC *http.Client // defaults to http.DefaultClient if nil
220 APIKey string // optional, if not set will try to load from env var
221 Model Model // defaults to DefaultModel if zero value
222 MaxTokens int // defaults to DefaultMaxTokens if zero
223 Org string // optional - organization ID
224}
225
226var _ llm.Service = (*Service)(nil)
227
Philip Zeyliger022b3632025-05-10 06:14:21 -0700228func (s *Service) ModelName() string {
229 return s.Model.UserName
230}
231
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700232// ModelsRegistry is a registry of all known models with their user-friendly names.
233var ModelsRegistry = []Model{
234 GPT41,
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700235 GPT41Mini,
236 GPT41Nano,
237 GPT4o,
238 GPT4oMini,
239 O3,
240 O4Mini,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700241 Gemini25Flash,
242 Gemini25Pro,
243 TogetherDeepseekV3,
244 TogetherLlama4Maverick,
245 TogetherLlama3_3_70B,
246 TogetherMistralSmall,
Josh Bleecher Snyder3e213082025-05-02 13:22:02 -0700247 TogetherQwen3,
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700248 TogetherGemma2,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700249 LlamaCPP,
250 FireworksDeepseekV3,
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700251 FireworksLlama4Maverick,
252 MistralMedium,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700253}
254
255// ListModels returns a list of all available models with their user-friendly names.
256func ListModels() []string {
257 var names []string
258 for _, model := range ModelsRegistry {
259 if model.UserName != "" {
260 names = append(names, model.UserName)
261 }
262 }
263 return names
264}
265
266// ModelByUserName returns a model by its user-friendly name.
267// Returns nil if no model with the given name is found.
268func ModelByUserName(name string) *Model {
269 for _, model := range ModelsRegistry {
270 if model.UserName == name {
271 return &model
272 }
273 }
274 return nil
275}
276
277var (
278 fromLLMRole = map[llm.MessageRole]string{
279 llm.MessageRoleAssistant: "assistant",
280 llm.MessageRoleUser: "user",
281 }
282 fromLLMContentType = map[llm.ContentType]string{
283 llm.ContentTypeText: "text",
284 llm.ContentTypeToolUse: "function", // OpenAI uses function instead of tool_call
285 llm.ContentTypeToolResult: "tool_result",
286 llm.ContentTypeThinking: "text", // Map thinking to text since OpenAI doesn't have thinking
287 llm.ContentTypeRedactedThinking: "text", // Map redacted_thinking to text
288 }
289 fromLLMToolChoiceType = map[llm.ToolChoiceType]string{
290 llm.ToolChoiceTypeAuto: "auto",
291 llm.ToolChoiceTypeAny: "any",
292 llm.ToolChoiceTypeNone: "none",
293 llm.ToolChoiceTypeTool: "function", // OpenAI uses "function" instead of "tool"
294 }
295 toLLMRole = map[string]llm.MessageRole{
296 "assistant": llm.MessageRoleAssistant,
297 "user": llm.MessageRoleUser,
298 }
299 toLLMStopReason = map[string]llm.StopReason{
300 "stop": llm.StopReasonStopSequence,
301 "length": llm.StopReasonMaxTokens,
302 "tool_calls": llm.StopReasonToolUse,
303 "function_call": llm.StopReasonToolUse, // Map both to ToolUse
304 "content_filter": llm.StopReasonStopSequence, // No direct equivalent
305 }
306)
307
308// fromLLMContent converts llm.Content to the format expected by OpenAI.
309func fromLLMContent(c llm.Content) (string, []openai.ToolCall) {
310 switch c.Type {
311 case llm.ContentTypeText:
312 return c.Text, nil
313 case llm.ContentTypeToolUse:
314 // For OpenAI, tool use is sent as a null content with tool_calls in the message
315 return "", []openai.ToolCall{
316 {
317 Type: openai.ToolTypeFunction,
318 ID: c.ID, // Use the content ID if provided
319 Function: openai.FunctionCall{
320 Name: c.ToolName,
321 Arguments: string(c.ToolInput),
322 },
323 },
324 }
325 case llm.ContentTypeToolResult:
326 // Tool results in OpenAI are sent as a separate message with tool_call_id
327 return c.ToolResult, nil
328 default:
329 // For thinking or other types, convert to text
330 return c.Text, nil
331 }
332}
333
334// fromLLMMessage converts llm.Message to OpenAI ChatCompletionMessage format
335func fromLLMMessage(msg llm.Message) []openai.ChatCompletionMessage {
336 // For OpenAI, we need to handle tool results differently than regular messages
337 // Each tool result becomes its own message with role="tool"
338
339 var messages []openai.ChatCompletionMessage
340
341 // Check if this is a regular message or contains tool results
342 var regularContent []llm.Content
343 var toolResults []llm.Content
344
345 for _, c := range msg.Content {
346 if c.Type == llm.ContentTypeToolResult {
347 toolResults = append(toolResults, c)
348 } else {
349 regularContent = append(regularContent, c)
350 }
351 }
352
353 // Process tool results as separate messages, but first
354 for _, tr := range toolResults {
355 m := openai.ChatCompletionMessage{
356 Role: "tool",
357 Content: cmp.Or(tr.ToolResult, " "), // TODO: remove omitempty upstream
358 ToolCallID: tr.ToolUseID,
359 }
360 messages = append(messages, m)
361 }
362 // Process regular content second
363 if len(regularContent) > 0 {
364 m := openai.ChatCompletionMessage{
365 Role: fromLLMRole[msg.Role],
366 }
367
368 // For assistant messages that contain tool calls
369 var toolCalls []openai.ToolCall
370 var textContent string
371
372 for _, c := range regularContent {
373 content, tools := fromLLMContent(c)
374 if len(tools) > 0 {
375 toolCalls = append(toolCalls, tools...)
376 } else if content != "" {
377 if textContent != "" {
378 textContent += "\n"
379 }
380 textContent += content
381 }
382 }
383
384 m.Content = textContent
385 m.ToolCalls = toolCalls
386
387 messages = append(messages, m)
388 }
389
390 return messages
391}
392
393// fromLLMToolChoice converts llm.ToolChoice to the format expected by OpenAI.
394func fromLLMToolChoice(tc *llm.ToolChoice) any {
395 if tc == nil {
396 return nil
397 }
398
399 if tc.Type == llm.ToolChoiceTypeTool && tc.Name != "" {
400 return openai.ToolChoice{
401 Type: openai.ToolTypeFunction,
402 Function: openai.ToolFunction{
403 Name: tc.Name,
404 },
405 }
406 }
407
408 // For non-specific tool choice, just use the string
409 return fromLLMToolChoiceType[tc.Type]
410}
411
412// fromLLMTool converts llm.Tool to the format expected by OpenAI.
413func fromLLMTool(t *llm.Tool) openai.Tool {
414 return openai.Tool{
415 Type: openai.ToolTypeFunction,
416 Function: &openai.FunctionDefinition{
417 Name: t.Name,
418 Description: t.Description,
419 Parameters: t.InputSchema,
420 },
421 }
422}
423
424// fromLLMSystem converts llm.SystemContent to an OpenAI system message.
425func fromLLMSystem(systemContent []llm.SystemContent) []openai.ChatCompletionMessage {
426 if len(systemContent) == 0 {
427 return nil
428 }
429
430 // Combine all system content into a single system message
431 var systemText string
432 for i, content := range systemContent {
433 if i > 0 && systemText != "" && content.Text != "" {
434 systemText += "\n"
435 }
436 systemText += content.Text
437 }
438
439 if systemText == "" {
440 return nil
441 }
442
443 return []openai.ChatCompletionMessage{
444 {
445 Role: "system",
446 Content: systemText,
447 },
448 }
449}
450
451// toRawLLMContent converts a raw content string from OpenAI to llm.Content.
452func toRawLLMContent(content string) llm.Content {
453 return llm.Content{
454 Type: llm.ContentTypeText,
455 Text: content,
456 }
457}
458
459// toToolCallLLMContent converts a tool call from OpenAI to llm.Content.
460func toToolCallLLMContent(toolCall openai.ToolCall) llm.Content {
461 // Generate a content ID if needed
462 id := toolCall.ID
463 if id == "" {
464 // Create a deterministic ID based on the function name if no ID is provided
465 id = "tc_" + toolCall.Function.Name
466 }
467
468 return llm.Content{
469 ID: id,
470 Type: llm.ContentTypeToolUse,
471 ToolName: toolCall.Function.Name,
472 ToolInput: json.RawMessage(toolCall.Function.Arguments),
473 }
474}
475
476// toToolResultLLMContent converts a tool result message from OpenAI to llm.Content.
477func toToolResultLLMContent(msg openai.ChatCompletionMessage) llm.Content {
478 return llm.Content{
479 Type: llm.ContentTypeToolResult,
480 ToolUseID: msg.ToolCallID,
481 ToolResult: msg.Content,
482 ToolError: false, // OpenAI doesn't specify errors explicitly
483 }
484}
485
486// toLLMContents converts message content from OpenAI to []llm.Content.
487func toLLMContents(msg openai.ChatCompletionMessage) []llm.Content {
488 var contents []llm.Content
489
490 // If this is a tool response, handle it separately
491 if msg.Role == "tool" && msg.ToolCallID != "" {
492 return []llm.Content{toToolResultLLMContent(msg)}
493 }
494
495 // If there's text content, add it
496 if msg.Content != "" {
497 contents = append(contents, toRawLLMContent(msg.Content))
498 }
499
500 // If there are tool calls, add them
501 for _, tc := range msg.ToolCalls {
502 contents = append(contents, toToolCallLLMContent(tc))
503 }
504
505 // If empty, add an empty text content
506 if len(contents) == 0 {
507 contents = append(contents, llm.Content{
508 Type: llm.ContentTypeText,
509 Text: "",
510 })
511 }
512
513 return contents
514}
515
516// toLLMUsage converts usage information from OpenAI to llm.Usage.
Josh Bleecher Snyder66439b02025-05-02 18:35:32 -0700517func (s *Service) toLLMUsage(au openai.Usage) llm.Usage {
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700518 // fmt.Printf("raw usage: %+v / %v / %v\n", au, au.PromptTokensDetails, au.CompletionTokensDetails)
519 in := uint64(au.PromptTokens)
520 var inc uint64
521 if au.PromptTokensDetails != nil {
522 inc = uint64(au.PromptTokensDetails.CachedTokens)
523 }
524 out := uint64(au.CompletionTokens)
525 u := llm.Usage{
526 InputTokens: in,
527 CacheReadInputTokens: inc,
528 CacheCreationInputTokens: in,
529 OutputTokens: out,
530 }
531 u.CostUSD = s.calculateCostFromTokens(u)
532 return u
533}
534
535// toLLMResponse converts the OpenAI response to llm.Response.
536func (s *Service) toLLMResponse(r *openai.ChatCompletionResponse) *llm.Response {
537 // fmt.Printf("Raw response\n")
538 // enc := json.NewEncoder(os.Stdout)
539 // enc.SetIndent("", " ")
540 // enc.Encode(r)
541 // fmt.Printf("\n")
542
543 if len(r.Choices) == 0 {
544 return &llm.Response{
545 ID: r.ID,
546 Model: r.Model,
547 Role: llm.MessageRoleAssistant,
Josh Bleecher Snyder66439b02025-05-02 18:35:32 -0700548 Usage: s.toLLMUsage(r.Usage),
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700549 }
550 }
551
552 // Process the primary choice
553 choice := r.Choices[0]
554
555 return &llm.Response{
556 ID: r.ID,
557 Model: r.Model,
558 Role: toRoleFromString(choice.Message.Role),
559 Content: toLLMContents(choice.Message),
560 StopReason: toStopReason(string(choice.FinishReason)),
Josh Bleecher Snyder66439b02025-05-02 18:35:32 -0700561 Usage: s.toLLMUsage(r.Usage),
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700562 }
563}
564
565// toRoleFromString converts a role string to llm.MessageRole.
566func toRoleFromString(role string) llm.MessageRole {
567 if role == "tool" || role == "system" || role == "function" {
568 return llm.MessageRoleAssistant // Map special roles to assistant for consistency
569 }
570 if mr, ok := toLLMRole[role]; ok {
571 return mr
572 }
573 return llm.MessageRoleUser // Default to user if unknown
574}
575
576// toStopReason converts a finish reason string to llm.StopReason.
577func toStopReason(reason string) llm.StopReason {
578 if sr, ok := toLLMStopReason[reason]; ok {
579 return sr
580 }
581 return llm.StopReasonStopSequence // Default
582}
583
584// calculateCostFromTokens calculates the cost in dollars for the given model and token counts.
585func (s *Service) calculateCostFromTokens(u llm.Usage) float64 {
586 cost := s.Model.Cost
587
588 // TODO: check this for correctness, i am skeptical
589 // Calculate cost in cents
590 megaCents := u.CacheCreationInputTokens*cost.Input +
591 u.CacheReadInputTokens*cost.CachedInput +
592 u.OutputTokens*cost.Output
593
594 cents := float64(megaCents) / 1_000_000
595 // Convert to dollars
596 dollars := cents / 100.0
597 // fmt.Printf("in_new=%d, in_cached=%d, out=%d, cost=%.2f\n", u.CacheCreationInputTokens, u.CacheReadInputTokens, u.OutputTokens, dollars)
598 return dollars
599}
600
601// Do sends a request to OpenAI using the go-openai package.
602func (s *Service) Do(ctx context.Context, ir *llm.Request) (*llm.Response, error) {
603 // Configure the OpenAI client
604 httpc := cmp.Or(s.HTTPC, http.DefaultClient)
605 model := cmp.Or(s.Model, DefaultModel)
606
607 // TODO: do this one during Service setup? maybe with a constructor instead?
608 config := openai.DefaultConfig(s.APIKey)
609 if model.URL != "" {
610 config.BaseURL = model.URL
611 }
612 if s.Org != "" {
613 config.OrgID = s.Org
614 }
615 config.HTTPClient = httpc
616
617 client := openai.NewClientWithConfig(config)
618
619 // Start with system messages if provided
620 var allMessages []openai.ChatCompletionMessage
621 if len(ir.System) > 0 {
622 sysMessages := fromLLMSystem(ir.System)
623 allMessages = append(allMessages, sysMessages...)
624 }
625
626 // Add regular and tool messages
627 for _, msg := range ir.Messages {
628 msgs := fromLLMMessage(msg)
629 allMessages = append(allMessages, msgs...)
630 }
631
632 // Convert tools
633 var tools []openai.Tool
634 for _, t := range ir.Tools {
635 tools = append(tools, fromLLMTool(t))
636 }
637
638 // Create the OpenAI request
639 req := openai.ChatCompletionRequest{
640 Model: model.ModelName,
641 Messages: allMessages,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700642 Tools: tools,
643 ToolChoice: fromLLMToolChoice(ir.ToolChoice), // TODO: make fromLLMToolChoice return an error when a perfect translation is not possible
644 }
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700645 if model.IsReasoningModel {
646 req.MaxCompletionTokens = cmp.Or(s.MaxTokens, DefaultMaxTokens)
647 } else {
648 req.MaxTokens = cmp.Or(s.MaxTokens, DefaultMaxTokens)
649 }
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700650 // fmt.Printf("Sending request to OpenAI\n")
651 // enc := json.NewEncoder(os.Stdout)
652 // enc.SetIndent("", " ")
653 // enc.Encode(req)
654 // fmt.Printf("\n")
655
656 // Retry mechanism
657 backoff := []time.Duration{1 * time.Second, 2 * time.Second, 5 * time.Second}
658
659 // retry loop
660 for attempts := 0; ; attempts++ {
661 resp, err := client.CreateChatCompletion(ctx, req)
662
663 // Handle successful response
664 if err == nil {
665 return s.toLLMResponse(&resp), nil
666 }
667
668 // Handle errors
669 var apiErr *openai.APIError
670 if ok := errors.As(err, &apiErr); !ok {
671 // Not an OpenAI API error, return immediately
672 return nil, err
673 }
674
675 switch {
676 case apiErr.HTTPStatusCode >= 500:
677 // Server error, try again with backoff
678 sleep := backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
679 slog.WarnContext(ctx, "openai_request_failed", "error", apiErr.Error(), "status_code", apiErr.HTTPStatusCode, "sleep", sleep)
680 time.Sleep(sleep)
681 continue
682
683 case apiErr.HTTPStatusCode == 429:
684 // Rate limited, back off longer
685 sleep := 20*time.Second + backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
686 slog.WarnContext(ctx, "openai_request_rate_limited", "error", apiErr.Error(), "sleep", sleep)
687 time.Sleep(sleep)
688 continue
689
690 default:
691 // Other error, return immediately
692 return nil, fmt.Errorf("OpenAI API error: %w", err)
693 }
694 }
695}