blob: 37484e0ae7c3a9f864946527f5e277093743bb3f [file] [log] [blame]
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -07001package oai
2
3import (
4 "cmp"
5 "context"
6 "encoding/json"
7 "errors"
8 "fmt"
9 "log/slog"
10 "math/rand/v2"
11 "net/http"
Philip Zeyliger72252cb2025-05-10 17:00:08 -070012 "strings"
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070013 "time"
14
15 "github.com/sashabaranov/go-openai"
16 "sketch.dev/llm"
17)
18
19const (
20 DefaultMaxTokens = 8192
21
22 OpenAIURL = "https://api.openai.com/v1"
23 FireworksURL = "https://api.fireworks.ai/inference/v1"
24 LlamaCPPURL = "http://localhost:8080/v1"
25 TogetherURL = "https://api.together.xyz/v1"
26 GeminiURL = "https://generativelanguage.googleapis.com/v1beta/openai/"
Josh Bleecher Snyderfa667032025-05-07 14:13:27 -070027 MistralURL = "https://api.mistral.ai/v1"
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070028
29 // Environment variable names for API keys
30 OpenAIAPIKeyEnv = "OPENAI_API_KEY"
31 FireworksAPIKeyEnv = "FIREWORKS_API_KEY"
32 TogetherAPIKeyEnv = "TOGETHER_API_KEY"
33 GeminiAPIKeyEnv = "GEMINI_API_KEY"
Josh Bleecher Snyderfa667032025-05-07 14:13:27 -070034 MistralAPIKeyEnv = "MISTRAL_API_KEY"
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070035)
36
37type Model struct {
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -070038 UserName string // provided by the user to identify this model (e.g. "gpt4.1")
39 ModelName string // provided to the service provide to specify which model to use (e.g. "gpt-4.1-2025-04-14")
40 URL string
41 Cost ModelCost
42 APIKeyEnv string // environment variable name for the API key
43 IsReasoningModel bool // whether this model is a reasoning model (e.g. O3, O4-mini)
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070044}
45
46type ModelCost struct {
47 Input uint64 // in cents per million tokens
48 CachedInput uint64 // in cents per million tokens
49 Output uint64 // in cents per million tokens
50}
51
52var (
53 DefaultModel = GPT41
54
55 GPT41 = Model{
56 UserName: "gpt4.1",
57 ModelName: "gpt-4.1-2025-04-14",
58 URL: OpenAIURL,
59 Cost: ModelCost{Input: 200, CachedInput: 50, Output: 800},
60 APIKeyEnv: OpenAIAPIKeyEnv,
61 }
62
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -070063 GPT4o = Model{
64 UserName: "gpt4o",
65 ModelName: "gpt-4o-2024-08-06",
66 URL: OpenAIURL,
67 Cost: ModelCost{Input: 250, CachedInput: 125, Output: 1000},
68 APIKeyEnv: OpenAIAPIKeyEnv,
69 }
70
71 GPT4oMini = Model{
72 UserName: "gpt4o-mini",
73 ModelName: "gpt-4o-mini-2024-07-18",
74 URL: OpenAIURL,
75 Cost: ModelCost{Input: 15, CachedInput: 8, Output: 60}, // 8 is actually 7.5 GRRR round up for now oh well
76 APIKeyEnv: OpenAIAPIKeyEnv,
77 }
78
79 GPT41Mini = Model{
80 UserName: "gpt4.1-mini",
81 ModelName: "gpt-4.1-mini-2025-04-14",
82 URL: OpenAIURL,
83 Cost: ModelCost{Input: 40, CachedInput: 10, Output: 160},
84 APIKeyEnv: OpenAIAPIKeyEnv,
85 }
86
87 GPT41Nano = Model{
88 UserName: "gpt4.1-nano",
89 ModelName: "gpt-4.1-nano-2025-04-14",
90 URL: OpenAIURL,
91 Cost: ModelCost{Input: 10, CachedInput: 3, Output: 40}, // 3 is actually 2.5 GRRR round up for now oh well
92 APIKeyEnv: OpenAIAPIKeyEnv,
93 }
94
95 O3 = Model{
96 UserName: "o3",
97 ModelName: "o3-2025-04-16",
98 URL: OpenAIURL,
99 Cost: ModelCost{Input: 1000, CachedInput: 250, Output: 4000},
100 APIKeyEnv: OpenAIAPIKeyEnv,
101 IsReasoningModel: true,
102 }
103
104 O4Mini = Model{
105 UserName: "o4-mini",
106 ModelName: "o4-mini-2025-04-16",
107 URL: OpenAIURL,
108 Cost: ModelCost{Input: 110, CachedInput: 28, Output: 440}, // 28 is actually 27.5 GRRR round up for now oh well
109 APIKeyEnv: OpenAIAPIKeyEnv,
110 IsReasoningModel: true,
111 }
112
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700113 Gemini25Flash = Model{
114 UserName: "gemini-flash-2.5",
115 ModelName: "gemini-2.5-flash-preview-04-17",
116 URL: GeminiURL,
117 Cost: ModelCost{Input: 15, Output: 60},
118 APIKeyEnv: GeminiAPIKeyEnv,
119 }
120
121 Gemini25Pro = Model{
122 UserName: "gemini-pro-2.5",
123 ModelName: "gemini-2.5-pro-preview-03-25",
124 URL: GeminiURL,
125 // GRRRR. Really??
126 // Input is: $1.25, prompts <= 200k tokens, $2.50, prompts > 200k tokens
127 // Output is: $10.00, prompts <= 200k tokens, $15.00, prompts > 200k
128 // Caching is: $0.31, prompts <= 200k tokens, $0.625, prompts > 200k, $4.50 / 1,000,000 tokens per hour
129 // Whatever that means. Are we caching? I have no idea.
130 // How do you always manage to be the annoying one, Google?
131 // I'm not complicating things just for you.
132 Cost: ModelCost{Input: 125, Output: 1000},
133 APIKeyEnv: GeminiAPIKeyEnv,
134 }
135
136 TogetherDeepseekV3 = Model{
137 UserName: "together-deepseek-v3",
138 ModelName: "deepseek-ai/DeepSeek-V3",
139 URL: TogetherURL,
140 Cost: ModelCost{Input: 125, Output: 125},
141 APIKeyEnv: TogetherAPIKeyEnv,
142 }
143
144 TogetherLlama4Maverick = Model{
145 UserName: "together-llama4-maverick",
146 ModelName: "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
147 URL: TogetherURL,
148 Cost: ModelCost{Input: 27, Output: 85},
149 APIKeyEnv: TogetherAPIKeyEnv,
150 }
151
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700152 FireworksLlama4Maverick = Model{
153 UserName: "fireworks-llama4-maverick",
154 ModelName: "accounts/fireworks/models/llama4-maverick-instruct-basic",
155 URL: FireworksURL,
156 Cost: ModelCost{Input: 22, Output: 88},
157 APIKeyEnv: FireworksAPIKeyEnv,
158 }
159
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700160 TogetherLlama3_3_70B = Model{
161 UserName: "together-llama3-70b",
162 ModelName: "meta-llama/Llama-3.3-70B-Instruct-Turbo",
163 URL: TogetherURL,
164 Cost: ModelCost{Input: 88, Output: 88},
165 APIKeyEnv: TogetherAPIKeyEnv,
166 }
167
168 TogetherMistralSmall = Model{
169 UserName: "together-mistral-small",
170 ModelName: "mistralai/Mistral-Small-24B-Instruct-2501",
171 URL: TogetherURL,
172 Cost: ModelCost{Input: 80, Output: 80},
173 APIKeyEnv: TogetherAPIKeyEnv,
174 }
175
Josh Bleecher Snyder3e213082025-05-02 13:22:02 -0700176 TogetherQwen3 = Model{
177 UserName: "together-qwen3",
178 ModelName: "Qwen/Qwen3-235B-A22B-fp8-tput",
179 URL: TogetherURL,
180 Cost: ModelCost{Input: 20, Output: 60},
181 APIKeyEnv: TogetherAPIKeyEnv,
182 }
183
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700184 TogetherGemma2 = Model{
185 UserName: "together-gemma2",
186 ModelName: "google/gemma-2-27b-it",
187 URL: TogetherURL,
188 Cost: ModelCost{Input: 80, Output: 80},
189 APIKeyEnv: TogetherAPIKeyEnv,
190 }
191
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700192 LlamaCPP = Model{
193 UserName: "llama.cpp",
194 ModelName: "llama.cpp local model",
195 URL: LlamaCPPURL,
196 // zero cost
197 Cost: ModelCost{},
198 }
199
200 FireworksDeepseekV3 = Model{
201 UserName: "fireworks-deepseek-v3",
202 ModelName: "accounts/fireworks/models/deepseek-v3-0324",
203 URL: FireworksURL,
204 Cost: ModelCost{Input: 90, Output: 90}, // not entirely sure about this, they don't list pricing anywhere convenient
205 APIKeyEnv: FireworksAPIKeyEnv,
206 }
Josh Bleecher Snyderfa667032025-05-07 14:13:27 -0700207
208 MistralMedium = Model{
209 UserName: "mistral-medium-3",
210 ModelName: "mistral-medium-latest",
211 URL: MistralURL,
212 Cost: ModelCost{Input: 40, Output: 200},
213 APIKeyEnv: MistralAPIKeyEnv,
214 }
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700215)
216
217// Service provides chat completions.
218// Fields should not be altered concurrently with calling any method on Service.
219type Service struct {
220 HTTPC *http.Client // defaults to http.DefaultClient if nil
221 APIKey string // optional, if not set will try to load from env var
222 Model Model // defaults to DefaultModel if zero value
223 MaxTokens int // defaults to DefaultMaxTokens if zero
224 Org string // optional - organization ID
225}
226
227var _ llm.Service = (*Service)(nil)
228
229// ModelsRegistry is a registry of all known models with their user-friendly names.
230var ModelsRegistry = []Model{
231 GPT41,
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700232 GPT41Mini,
233 GPT41Nano,
234 GPT4o,
235 GPT4oMini,
236 O3,
237 O4Mini,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700238 Gemini25Flash,
239 Gemini25Pro,
240 TogetherDeepseekV3,
241 TogetherLlama4Maverick,
242 TogetherLlama3_3_70B,
243 TogetherMistralSmall,
Josh Bleecher Snyder3e213082025-05-02 13:22:02 -0700244 TogetherQwen3,
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700245 TogetherGemma2,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700246 LlamaCPP,
247 FireworksDeepseekV3,
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700248 FireworksLlama4Maverick,
249 MistralMedium,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700250}
251
252// ListModels returns a list of all available models with their user-friendly names.
253func ListModels() []string {
254 var names []string
255 for _, model := range ModelsRegistry {
256 if model.UserName != "" {
257 names = append(names, model.UserName)
258 }
259 }
260 return names
261}
262
263// ModelByUserName returns a model by its user-friendly name.
264// Returns nil if no model with the given name is found.
265func ModelByUserName(name string) *Model {
266 for _, model := range ModelsRegistry {
267 if model.UserName == name {
268 return &model
269 }
270 }
271 return nil
272}
273
274var (
275 fromLLMRole = map[llm.MessageRole]string{
276 llm.MessageRoleAssistant: "assistant",
277 llm.MessageRoleUser: "user",
278 }
279 fromLLMContentType = map[llm.ContentType]string{
280 llm.ContentTypeText: "text",
281 llm.ContentTypeToolUse: "function", // OpenAI uses function instead of tool_call
282 llm.ContentTypeToolResult: "tool_result",
283 llm.ContentTypeThinking: "text", // Map thinking to text since OpenAI doesn't have thinking
284 llm.ContentTypeRedactedThinking: "text", // Map redacted_thinking to text
285 }
286 fromLLMToolChoiceType = map[llm.ToolChoiceType]string{
287 llm.ToolChoiceTypeAuto: "auto",
288 llm.ToolChoiceTypeAny: "any",
289 llm.ToolChoiceTypeNone: "none",
290 llm.ToolChoiceTypeTool: "function", // OpenAI uses "function" instead of "tool"
291 }
292 toLLMRole = map[string]llm.MessageRole{
293 "assistant": llm.MessageRoleAssistant,
294 "user": llm.MessageRoleUser,
295 }
296 toLLMStopReason = map[string]llm.StopReason{
297 "stop": llm.StopReasonStopSequence,
298 "length": llm.StopReasonMaxTokens,
299 "tool_calls": llm.StopReasonToolUse,
300 "function_call": llm.StopReasonToolUse, // Map both to ToolUse
301 "content_filter": llm.StopReasonStopSequence, // No direct equivalent
302 }
303)
304
305// fromLLMContent converts llm.Content to the format expected by OpenAI.
306func fromLLMContent(c llm.Content) (string, []openai.ToolCall) {
307 switch c.Type {
308 case llm.ContentTypeText:
309 return c.Text, nil
310 case llm.ContentTypeToolUse:
311 // For OpenAI, tool use is sent as a null content with tool_calls in the message
312 return "", []openai.ToolCall{
313 {
314 Type: openai.ToolTypeFunction,
315 ID: c.ID, // Use the content ID if provided
316 Function: openai.FunctionCall{
317 Name: c.ToolName,
318 Arguments: string(c.ToolInput),
319 },
320 },
321 }
322 case llm.ContentTypeToolResult:
323 // Tool results in OpenAI are sent as a separate message with tool_call_id
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700324 // OpenAI doesn't support multiple content items or images in tool results
325 // Combine all text content into a single string
326 var resultText string
327 if len(c.ToolResult) > 0 {
328 // Collect all text from content objects
329 texts := make([]string, 0, len(c.ToolResult))
330 for _, result := range c.ToolResult {
331 if result.Text != "" {
332 texts = append(texts, result.Text)
333 }
334 }
335 resultText = strings.Join(texts, "\n")
336 }
337 return resultText, nil
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700338 default:
339 // For thinking or other types, convert to text
340 return c.Text, nil
341 }
342}
343
344// fromLLMMessage converts llm.Message to OpenAI ChatCompletionMessage format
345func fromLLMMessage(msg llm.Message) []openai.ChatCompletionMessage {
346 // For OpenAI, we need to handle tool results differently than regular messages
347 // Each tool result becomes its own message with role="tool"
348
349 var messages []openai.ChatCompletionMessage
350
351 // Check if this is a regular message or contains tool results
352 var regularContent []llm.Content
353 var toolResults []llm.Content
354
355 for _, c := range msg.Content {
356 if c.Type == llm.ContentTypeToolResult {
357 toolResults = append(toolResults, c)
358 } else {
359 regularContent = append(regularContent, c)
360 }
361 }
362
363 // Process tool results as separate messages, but first
364 for _, tr := range toolResults {
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700365 // Convert toolresult array to a string for OpenAI
366 var toolResultContent string
367 if len(tr.ToolResult) > 0 {
368 // For now, just use the first text content in the array
369 toolResultContent = tr.ToolResult[0].Text
370 }
371
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700372 m := openai.ChatCompletionMessage{
373 Role: "tool",
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700374 Content: cmp.Or(toolResultContent, " "), // Use empty space if empty to avoid omitempty issues
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700375 ToolCallID: tr.ToolUseID,
376 }
377 messages = append(messages, m)
378 }
379 // Process regular content second
380 if len(regularContent) > 0 {
381 m := openai.ChatCompletionMessage{
382 Role: fromLLMRole[msg.Role],
383 }
384
385 // For assistant messages that contain tool calls
386 var toolCalls []openai.ToolCall
387 var textContent string
388
389 for _, c := range regularContent {
390 content, tools := fromLLMContent(c)
391 if len(tools) > 0 {
392 toolCalls = append(toolCalls, tools...)
393 } else if content != "" {
394 if textContent != "" {
395 textContent += "\n"
396 }
397 textContent += content
398 }
399 }
400
401 m.Content = textContent
402 m.ToolCalls = toolCalls
403
404 messages = append(messages, m)
405 }
406
407 return messages
408}
409
410// fromLLMToolChoice converts llm.ToolChoice to the format expected by OpenAI.
411func fromLLMToolChoice(tc *llm.ToolChoice) any {
412 if tc == nil {
413 return nil
414 }
415
416 if tc.Type == llm.ToolChoiceTypeTool && tc.Name != "" {
417 return openai.ToolChoice{
418 Type: openai.ToolTypeFunction,
419 Function: openai.ToolFunction{
420 Name: tc.Name,
421 },
422 }
423 }
424
425 // For non-specific tool choice, just use the string
426 return fromLLMToolChoiceType[tc.Type]
427}
428
429// fromLLMTool converts llm.Tool to the format expected by OpenAI.
430func fromLLMTool(t *llm.Tool) openai.Tool {
431 return openai.Tool{
432 Type: openai.ToolTypeFunction,
433 Function: &openai.FunctionDefinition{
434 Name: t.Name,
435 Description: t.Description,
436 Parameters: t.InputSchema,
437 },
438 }
439}
440
441// fromLLMSystem converts llm.SystemContent to an OpenAI system message.
442func fromLLMSystem(systemContent []llm.SystemContent) []openai.ChatCompletionMessage {
443 if len(systemContent) == 0 {
444 return nil
445 }
446
447 // Combine all system content into a single system message
448 var systemText string
449 for i, content := range systemContent {
450 if i > 0 && systemText != "" && content.Text != "" {
451 systemText += "\n"
452 }
453 systemText += content.Text
454 }
455
456 if systemText == "" {
457 return nil
458 }
459
460 return []openai.ChatCompletionMessage{
461 {
462 Role: "system",
463 Content: systemText,
464 },
465 }
466}
467
468// toRawLLMContent converts a raw content string from OpenAI to llm.Content.
469func toRawLLMContent(content string) llm.Content {
470 return llm.Content{
471 Type: llm.ContentTypeText,
472 Text: content,
473 }
474}
475
476// toToolCallLLMContent converts a tool call from OpenAI to llm.Content.
477func toToolCallLLMContent(toolCall openai.ToolCall) llm.Content {
478 // Generate a content ID if needed
479 id := toolCall.ID
480 if id == "" {
481 // Create a deterministic ID based on the function name if no ID is provided
482 id = "tc_" + toolCall.Function.Name
483 }
484
485 return llm.Content{
486 ID: id,
487 Type: llm.ContentTypeToolUse,
488 ToolName: toolCall.Function.Name,
489 ToolInput: json.RawMessage(toolCall.Function.Arguments),
490 }
491}
492
493// toToolResultLLMContent converts a tool result message from OpenAI to llm.Content.
494func toToolResultLLMContent(msg openai.ChatCompletionMessage) llm.Content {
495 return llm.Content{
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700496 Type: llm.ContentTypeToolResult,
497 ToolUseID: msg.ToolCallID,
498 ToolResult: []llm.Content{{
499 Type: llm.ContentTypeText,
500 Text: msg.Content,
501 }},
502 ToolError: false, // OpenAI doesn't specify errors explicitly
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700503 }
504}
505
506// toLLMContents converts message content from OpenAI to []llm.Content.
507func toLLMContents(msg openai.ChatCompletionMessage) []llm.Content {
508 var contents []llm.Content
509
510 // If this is a tool response, handle it separately
511 if msg.Role == "tool" && msg.ToolCallID != "" {
512 return []llm.Content{toToolResultLLMContent(msg)}
513 }
514
515 // If there's text content, add it
516 if msg.Content != "" {
517 contents = append(contents, toRawLLMContent(msg.Content))
518 }
519
520 // If there are tool calls, add them
521 for _, tc := range msg.ToolCalls {
522 contents = append(contents, toToolCallLLMContent(tc))
523 }
524
525 // If empty, add an empty text content
526 if len(contents) == 0 {
527 contents = append(contents, llm.Content{
528 Type: llm.ContentTypeText,
529 Text: "",
530 })
531 }
532
533 return contents
534}
535
536// toLLMUsage converts usage information from OpenAI to llm.Usage.
Josh Bleecher Snyder66439b02025-05-02 18:35:32 -0700537func (s *Service) toLLMUsage(au openai.Usage) llm.Usage {
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700538 // fmt.Printf("raw usage: %+v / %v / %v\n", au, au.PromptTokensDetails, au.CompletionTokensDetails)
539 in := uint64(au.PromptTokens)
540 var inc uint64
541 if au.PromptTokensDetails != nil {
542 inc = uint64(au.PromptTokensDetails.CachedTokens)
543 }
544 out := uint64(au.CompletionTokens)
545 u := llm.Usage{
546 InputTokens: in,
547 CacheReadInputTokens: inc,
548 CacheCreationInputTokens: in,
549 OutputTokens: out,
550 }
551 u.CostUSD = s.calculateCostFromTokens(u)
552 return u
553}
554
555// toLLMResponse converts the OpenAI response to llm.Response.
556func (s *Service) toLLMResponse(r *openai.ChatCompletionResponse) *llm.Response {
557 // fmt.Printf("Raw response\n")
558 // enc := json.NewEncoder(os.Stdout)
559 // enc.SetIndent("", " ")
560 // enc.Encode(r)
561 // fmt.Printf("\n")
562
563 if len(r.Choices) == 0 {
564 return &llm.Response{
565 ID: r.ID,
566 Model: r.Model,
567 Role: llm.MessageRoleAssistant,
Josh Bleecher Snyder66439b02025-05-02 18:35:32 -0700568 Usage: s.toLLMUsage(r.Usage),
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700569 }
570 }
571
572 // Process the primary choice
573 choice := r.Choices[0]
574
575 return &llm.Response{
576 ID: r.ID,
577 Model: r.Model,
578 Role: toRoleFromString(choice.Message.Role),
579 Content: toLLMContents(choice.Message),
580 StopReason: toStopReason(string(choice.FinishReason)),
Josh Bleecher Snyder66439b02025-05-02 18:35:32 -0700581 Usage: s.toLLMUsage(r.Usage),
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700582 }
583}
584
585// toRoleFromString converts a role string to llm.MessageRole.
586func toRoleFromString(role string) llm.MessageRole {
587 if role == "tool" || role == "system" || role == "function" {
588 return llm.MessageRoleAssistant // Map special roles to assistant for consistency
589 }
590 if mr, ok := toLLMRole[role]; ok {
591 return mr
592 }
593 return llm.MessageRoleUser // Default to user if unknown
594}
595
596// toStopReason converts a finish reason string to llm.StopReason.
597func toStopReason(reason string) llm.StopReason {
598 if sr, ok := toLLMStopReason[reason]; ok {
599 return sr
600 }
601 return llm.StopReasonStopSequence // Default
602}
603
604// calculateCostFromTokens calculates the cost in dollars for the given model and token counts.
605func (s *Service) calculateCostFromTokens(u llm.Usage) float64 {
606 cost := s.Model.Cost
607
608 // TODO: check this for correctness, i am skeptical
609 // Calculate cost in cents
610 megaCents := u.CacheCreationInputTokens*cost.Input +
611 u.CacheReadInputTokens*cost.CachedInput +
612 u.OutputTokens*cost.Output
613
614 cents := float64(megaCents) / 1_000_000
615 // Convert to dollars
616 dollars := cents / 100.0
617 // fmt.Printf("in_new=%d, in_cached=%d, out=%d, cost=%.2f\n", u.CacheCreationInputTokens, u.CacheReadInputTokens, u.OutputTokens, dollars)
618 return dollars
619}
620
621// Do sends a request to OpenAI using the go-openai package.
622func (s *Service) Do(ctx context.Context, ir *llm.Request) (*llm.Response, error) {
623 // Configure the OpenAI client
624 httpc := cmp.Or(s.HTTPC, http.DefaultClient)
625 model := cmp.Or(s.Model, DefaultModel)
626
627 // TODO: do this one during Service setup? maybe with a constructor instead?
628 config := openai.DefaultConfig(s.APIKey)
629 if model.URL != "" {
630 config.BaseURL = model.URL
631 }
632 if s.Org != "" {
633 config.OrgID = s.Org
634 }
635 config.HTTPClient = httpc
636
637 client := openai.NewClientWithConfig(config)
638
639 // Start with system messages if provided
640 var allMessages []openai.ChatCompletionMessage
641 if len(ir.System) > 0 {
642 sysMessages := fromLLMSystem(ir.System)
643 allMessages = append(allMessages, sysMessages...)
644 }
645
646 // Add regular and tool messages
647 for _, msg := range ir.Messages {
648 msgs := fromLLMMessage(msg)
649 allMessages = append(allMessages, msgs...)
650 }
651
652 // Convert tools
653 var tools []openai.Tool
654 for _, t := range ir.Tools {
655 tools = append(tools, fromLLMTool(t))
656 }
657
658 // Create the OpenAI request
659 req := openai.ChatCompletionRequest{
660 Model: model.ModelName,
661 Messages: allMessages,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700662 Tools: tools,
663 ToolChoice: fromLLMToolChoice(ir.ToolChoice), // TODO: make fromLLMToolChoice return an error when a perfect translation is not possible
664 }
Josh Bleecher Snyder8236cbc2025-05-09 09:57:57 -0700665 if model.IsReasoningModel {
666 req.MaxCompletionTokens = cmp.Or(s.MaxTokens, DefaultMaxTokens)
667 } else {
668 req.MaxTokens = cmp.Or(s.MaxTokens, DefaultMaxTokens)
669 }
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700670 // fmt.Printf("Sending request to OpenAI\n")
671 // enc := json.NewEncoder(os.Stdout)
672 // enc.SetIndent("", " ")
673 // enc.Encode(req)
674 // fmt.Printf("\n")
675
676 // Retry mechanism
677 backoff := []time.Duration{1 * time.Second, 2 * time.Second, 5 * time.Second}
678
679 // retry loop
680 for attempts := 0; ; attempts++ {
681 resp, err := client.CreateChatCompletion(ctx, req)
682
683 // Handle successful response
684 if err == nil {
685 return s.toLLMResponse(&resp), nil
686 }
687
688 // Handle errors
689 var apiErr *openai.APIError
690 if ok := errors.As(err, &apiErr); !ok {
691 // Not an OpenAI API error, return immediately
692 return nil, err
693 }
694
695 switch {
696 case apiErr.HTTPStatusCode >= 500:
697 // Server error, try again with backoff
698 sleep := backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
699 slog.WarnContext(ctx, "openai_request_failed", "error", apiErr.Error(), "status_code", apiErr.HTTPStatusCode, "sleep", sleep)
700 time.Sleep(sleep)
701 continue
702
703 case apiErr.HTTPStatusCode == 429:
704 // Rate limited, back off longer
705 sleep := 20*time.Second + backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
706 slog.WarnContext(ctx, "openai_request_rate_limited", "error", apiErr.Error(), "sleep", sleep)
707 time.Sleep(sleep)
708 continue
709
710 default:
711 // Other error, return immediately
712 return nil, fmt.Errorf("OpenAI API error: %w", err)
713 }
714 }
715}