Blame - llm/oai/oai.go - sketch

blob: 8b64157e30fea81a4060784bd6bcc02bc2602584 [file] [log] [blame]

Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	1	package oai
				2
				3	import (
				4	"cmp"
				5	"context"
				6	"encoding/json"
				7	"errors"
				8	"fmt"
				9	"log/slog"
				10	"math/rand/v2"
				11	"net/http"
				12	"time"
				13
				14	"github.com/sashabaranov/go-openai"
				15	"sketch.dev/llm"
				16	)
				17
				18	const (
				19	DefaultMaxTokens = 8192
				20
				21	OpenAIURL = "https://api.openai.com/v1"
				22	FireworksURL = "https://api.fireworks.ai/inference/v1"
				23	LlamaCPPURL = "http://localhost:8080/v1"
				24	TogetherURL = "https://api.together.xyz/v1"
				25	GeminiURL = "https://generativelanguage.googleapis.com/v1beta/openai/"
Josh Bleecher Snyder	fa66703	2025-05-07 14:13:27 -0700	[diff] [blame]	26	MistralURL = "https://api.mistral.ai/v1"
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	27
				28	// Environment variable names for API keys
				29	OpenAIAPIKeyEnv = "OPENAI_API_KEY"
				30	FireworksAPIKeyEnv = "FIREWORKS_API_KEY"
				31	TogetherAPIKeyEnv = "TOGETHER_API_KEY"
				32	GeminiAPIKeyEnv = "GEMINI_API_KEY"
Josh Bleecher Snyder	fa66703	2025-05-07 14:13:27 -0700	[diff] [blame]	33	MistralAPIKeyEnv = "MISTRAL_API_KEY"
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	34	)
				35
				36	type Model struct {
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame^]	37	UserName string // provided by the user to identify this model (e.g. "gpt4.1")
				38	ModelName string // provided to the service provide to specify which model to use (e.g. "gpt-4.1-2025-04-14")
				39	URL string
				40	Cost ModelCost
				41	APIKeyEnv string // environment variable name for the API key
				42	IsReasoningModel bool // whether this model is a reasoning model (e.g. O3, O4-mini)
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	43	}
				44
				45	type ModelCost struct {
				46	Input uint64 // in cents per million tokens
				47	CachedInput uint64 // in cents per million tokens
				48	Output uint64 // in cents per million tokens
				49	}
				50
				51	var (
				52	DefaultModel = GPT41
				53
				54	GPT41 = Model{
				55	UserName: "gpt4.1",
				56	ModelName: "gpt-4.1-2025-04-14",
				57	URL: OpenAIURL,
				58	Cost: ModelCost{Input: 200, CachedInput: 50, Output: 800},
				59	APIKeyEnv: OpenAIAPIKeyEnv,
				60	}
				61
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame^]	62	GPT4o = Model{
				63	UserName: "gpt4o",
				64	ModelName: "gpt-4o-2024-08-06",
				65	URL: OpenAIURL,
				66	Cost: ModelCost{Input: 250, CachedInput: 125, Output: 1000},
				67	APIKeyEnv: OpenAIAPIKeyEnv,
				68	}
				69
				70	GPT4oMini = Model{
				71	UserName: "gpt4o-mini",
				72	ModelName: "gpt-4o-mini-2024-07-18",
				73	URL: OpenAIURL,
				74	Cost: ModelCost{Input: 15, CachedInput: 8, Output: 60}, // 8 is actually 7.5 GRRR round up for now oh well
				75	APIKeyEnv: OpenAIAPIKeyEnv,
				76	}
				77
				78	GPT41Mini = Model{
				79	UserName: "gpt4.1-mini",
				80	ModelName: "gpt-4.1-mini-2025-04-14",
				81	URL: OpenAIURL,
				82	Cost: ModelCost{Input: 40, CachedInput: 10, Output: 160},
				83	APIKeyEnv: OpenAIAPIKeyEnv,
				84	}
				85
				86	GPT41Nano = Model{
				87	UserName: "gpt4.1-nano",
				88	ModelName: "gpt-4.1-nano-2025-04-14",
				89	URL: OpenAIURL,
				90	Cost: ModelCost{Input: 10, CachedInput: 3, Output: 40}, // 3 is actually 2.5 GRRR round up for now oh well
				91	APIKeyEnv: OpenAIAPIKeyEnv,
				92	}
				93
				94	O3 = Model{
				95	UserName: "o3",
				96	ModelName: "o3-2025-04-16",
				97	URL: OpenAIURL,
				98	Cost: ModelCost{Input: 1000, CachedInput: 250, Output: 4000},
				99	APIKeyEnv: OpenAIAPIKeyEnv,
				100	IsReasoningModel: true,
				101	}
				102
				103	O4Mini = Model{
				104	UserName: "o4-mini",
				105	ModelName: "o4-mini-2025-04-16",
				106	URL: OpenAIURL,
				107	Cost: ModelCost{Input: 110, CachedInput: 28, Output: 440}, // 28 is actually 27.5 GRRR round up for now oh well
				108	APIKeyEnv: OpenAIAPIKeyEnv,
				109	IsReasoningModel: true,
				110	}
				111
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	112	Gemini25Flash = Model{
				113	UserName: "gemini-flash-2.5",
				114	ModelName: "gemini-2.5-flash-preview-04-17",
				115	URL: GeminiURL,
				116	Cost: ModelCost{Input: 15, Output: 60},
				117	APIKeyEnv: GeminiAPIKeyEnv,
				118	}
				119
				120	Gemini25Pro = Model{
				121	UserName: "gemini-pro-2.5",
				122	ModelName: "gemini-2.5-pro-preview-03-25",
				123	URL: GeminiURL,
				124	// GRRRR. Really??
				125	// Input is: $1.25, prompts <= 200k tokens, $2.50, prompts > 200k tokens
				126	// Output is: $10.00, prompts <= 200k tokens, $15.00, prompts > 200k
				127	// Caching is: $0.31, prompts <= 200k tokens, $0.625, prompts > 200k, $4.50 / 1,000,000 tokens per hour
				128	// Whatever that means. Are we caching? I have no idea.
				129	// How do you always manage to be the annoying one, Google?
				130	// I'm not complicating things just for you.
				131	Cost: ModelCost{Input: 125, Output: 1000},
				132	APIKeyEnv: GeminiAPIKeyEnv,
				133	}
				134
				135	TogetherDeepseekV3 = Model{
				136	UserName: "together-deepseek-v3",
				137	ModelName: "deepseek-ai/DeepSeek-V3",
				138	URL: TogetherURL,
				139	Cost: ModelCost{Input: 125, Output: 125},
				140	APIKeyEnv: TogetherAPIKeyEnv,
				141	}
				142
				143	TogetherLlama4Maverick = Model{
				144	UserName: "together-llama4-maverick",
				145	ModelName: "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
				146	URL: TogetherURL,
				147	Cost: ModelCost{Input: 27, Output: 85},
				148	APIKeyEnv: TogetherAPIKeyEnv,
				149	}
				150
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame^]	151	FireworksLlama4Maverick = Model{
				152	UserName: "fireworks-llama4-maverick",
				153	ModelName: "accounts/fireworks/models/llama4-maverick-instruct-basic",
				154	URL: FireworksURL,
				155	Cost: ModelCost{Input: 22, Output: 88},
				156	APIKeyEnv: FireworksAPIKeyEnv,
				157	}
				158
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	159	TogetherLlama3_3_70B = Model{
				160	UserName: "together-llama3-70b",
				161	ModelName: "meta-llama/Llama-3.3-70B-Instruct-Turbo",
				162	URL: TogetherURL,
				163	Cost: ModelCost{Input: 88, Output: 88},
				164	APIKeyEnv: TogetherAPIKeyEnv,
				165	}
				166
				167	TogetherMistralSmall = Model{
				168	UserName: "together-mistral-small",
				169	ModelName: "mistralai/Mistral-Small-24B-Instruct-2501",
				170	URL: TogetherURL,
				171	Cost: ModelCost{Input: 80, Output: 80},
				172	APIKeyEnv: TogetherAPIKeyEnv,
				173	}
				174
Josh Bleecher Snyder	3e21308	2025-05-02 13:22:02 -0700	[diff] [blame]	175	TogetherQwen3 = Model{
				176	UserName: "together-qwen3",
				177	ModelName: "Qwen/Qwen3-235B-A22B-fp8-tput",
				178	URL: TogetherURL,
				179	Cost: ModelCost{Input: 20, Output: 60},
				180	APIKeyEnv: TogetherAPIKeyEnv,
				181	}
				182
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame^]	183	TogetherGemma2 = Model{
				184	UserName: "together-gemma2",
				185	ModelName: "google/gemma-2-27b-it",
				186	URL: TogetherURL,
				187	Cost: ModelCost{Input: 80, Output: 80},
				188	APIKeyEnv: TogetherAPIKeyEnv,
				189	}
				190
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	191	LlamaCPP = Model{
				192	UserName: "llama.cpp",
				193	ModelName: "llama.cpp local model",
				194	URL: LlamaCPPURL,
				195	// zero cost
				196	Cost: ModelCost{},
				197	}
				198
				199	FireworksDeepseekV3 = Model{
				200	UserName: "fireworks-deepseek-v3",
				201	ModelName: "accounts/fireworks/models/deepseek-v3-0324",
				202	URL: FireworksURL,
				203	Cost: ModelCost{Input: 90, Output: 90}, // not entirely sure about this, they don't list pricing anywhere convenient
				204	APIKeyEnv: FireworksAPIKeyEnv,
				205	}
Josh Bleecher Snyder	fa66703	2025-05-07 14:13:27 -0700	[diff] [blame]	206
				207	MistralMedium = Model{
				208	UserName: "mistral-medium-3",
				209	ModelName: "mistral-medium-latest",
				210	URL: MistralURL,
				211	Cost: ModelCost{Input: 40, Output: 200},
				212	APIKeyEnv: MistralAPIKeyEnv,
				213	}
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	214	)
				215
				216	// Service provides chat completions.
				217	// Fields should not be altered concurrently with calling any method on Service.
				218	type Service struct {
				219	HTTPC *http.Client // defaults to http.DefaultClient if nil
				220	APIKey string // optional, if not set will try to load from env var
				221	Model Model // defaults to DefaultModel if zero value
				222	MaxTokens int // defaults to DefaultMaxTokens if zero
				223	Org string // optional - organization ID
				224	}
				225
				226	var _ llm.Service = (*Service)(nil)
				227
				228	// ModelsRegistry is a registry of all known models with their user-friendly names.
				229	var ModelsRegistry = []Model{
				230	GPT41,
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame^]	231	GPT41Mini,
				232	GPT41Nano,
				233	GPT4o,
				234	GPT4oMini,
				235	O3,
				236	O4Mini,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	237	Gemini25Flash,
				238	Gemini25Pro,
				239	TogetherDeepseekV3,
				240	TogetherLlama4Maverick,
				241	TogetherLlama3_3_70B,
				242	TogetherMistralSmall,
Josh Bleecher Snyder	3e21308	2025-05-02 13:22:02 -0700	[diff] [blame]	243	TogetherQwen3,
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame^]	244	TogetherGemma2,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	245	LlamaCPP,
				246	FireworksDeepseekV3,
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame^]	247	FireworksLlama4Maverick,
				248	MistralMedium,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	249	}
				250
				251	// ListModels returns a list of all available models with their user-friendly names.
				252	func ListModels() []string {
				253	var names []string
				254	for _, model := range ModelsRegistry {
				255	if model.UserName != "" {
				256	names = append(names, model.UserName)
				257	}
				258	}
				259	return names
				260	}
				261
				262	// ModelByUserName returns a model by its user-friendly name.
				263	// Returns nil if no model with the given name is found.
				264	func ModelByUserName(name string) *Model {
				265	for _, model := range ModelsRegistry {
				266	if model.UserName == name {
				267	return &model
				268	}
				269	}
				270	return nil
				271	}
				272
				273	var (
				274	fromLLMRole = map[llm.MessageRole]string{
				275	llm.MessageRoleAssistant: "assistant",
				276	llm.MessageRoleUser: "user",
				277	}
				278	fromLLMContentType = map[llm.ContentType]string{
				279	llm.ContentTypeText: "text",
				280	llm.ContentTypeToolUse: "function", // OpenAI uses function instead of tool_call
				281	llm.ContentTypeToolResult: "tool_result",
				282	llm.ContentTypeThinking: "text", // Map thinking to text since OpenAI doesn't have thinking
				283	llm.ContentTypeRedactedThinking: "text", // Map redacted_thinking to text
				284	}
				285	fromLLMToolChoiceType = map[llm.ToolChoiceType]string{
				286	llm.ToolChoiceTypeAuto: "auto",
				287	llm.ToolChoiceTypeAny: "any",
				288	llm.ToolChoiceTypeNone: "none",
				289	llm.ToolChoiceTypeTool: "function", // OpenAI uses "function" instead of "tool"
				290	}
				291	toLLMRole = map[string]llm.MessageRole{
				292	"assistant": llm.MessageRoleAssistant,
				293	"user": llm.MessageRoleUser,
				294	}
				295	toLLMStopReason = map[string]llm.StopReason{
				296	"stop": llm.StopReasonStopSequence,
				297	"length": llm.StopReasonMaxTokens,
				298	"tool_calls": llm.StopReasonToolUse,
				299	"function_call": llm.StopReasonToolUse, // Map both to ToolUse
				300	"content_filter": llm.StopReasonStopSequence, // No direct equivalent
				301	}
				302	)
				303
				304	// fromLLMContent converts llm.Content to the format expected by OpenAI.
				305	func fromLLMContent(c llm.Content) (string, []openai.ToolCall) {
				306	switch c.Type {
				307	case llm.ContentTypeText:
				308	return c.Text, nil
				309	case llm.ContentTypeToolUse:
				310	// For OpenAI, tool use is sent as a null content with tool_calls in the message
				311	return "", []openai.ToolCall{
				312	{
				313	Type: openai.ToolTypeFunction,
				314	ID: c.ID, // Use the content ID if provided
				315	Function: openai.FunctionCall{
				316	Name: c.ToolName,
				317	Arguments: string(c.ToolInput),
				318	},
				319	},
				320	}
				321	case llm.ContentTypeToolResult:
				322	// Tool results in OpenAI are sent as a separate message with tool_call_id
				323	return c.ToolResult, nil
				324	default:
				325	// For thinking or other types, convert to text
				326	return c.Text, nil
				327	}
				328	}
				329
				330	// fromLLMMessage converts llm.Message to OpenAI ChatCompletionMessage format
				331	func fromLLMMessage(msg llm.Message) []openai.ChatCompletionMessage {
				332	// For OpenAI, we need to handle tool results differently than regular messages
				333	// Each tool result becomes its own message with role="tool"
				334
				335	var messages []openai.ChatCompletionMessage
				336
				337	// Check if this is a regular message or contains tool results
				338	var regularContent []llm.Content
				339	var toolResults []llm.Content
				340
				341	for _, c := range msg.Content {
				342	if c.Type == llm.ContentTypeToolResult {
				343	toolResults = append(toolResults, c)
				344	} else {
				345	regularContent = append(regularContent, c)
				346	}
				347	}
				348
				349	// Process tool results as separate messages, but first
				350	for _, tr := range toolResults {
				351	m := openai.ChatCompletionMessage{
				352	Role: "tool",
				353	Content: cmp.Or(tr.ToolResult, " "), // TODO: remove omitempty upstream
				354	ToolCallID: tr.ToolUseID,
				355	}
				356	messages = append(messages, m)
				357	}
				358	// Process regular content second
				359	if len(regularContent) > 0 {
				360	m := openai.ChatCompletionMessage{
				361	Role: fromLLMRole[msg.Role],
				362	}
				363
				364	// For assistant messages that contain tool calls
				365	var toolCalls []openai.ToolCall
				366	var textContent string
				367
				368	for _, c := range regularContent {
				369	content, tools := fromLLMContent(c)
				370	if len(tools) > 0 {
				371	toolCalls = append(toolCalls, tools...)
				372	} else if content != "" {
				373	if textContent != "" {
				374	textContent += "\n"
				375	}
				376	textContent += content
				377	}
				378	}
				379
				380	m.Content = textContent
				381	m.ToolCalls = toolCalls
				382
				383	messages = append(messages, m)
				384	}
				385
				386	return messages
				387	}
				388
				389	// fromLLMToolChoice converts llm.ToolChoice to the format expected by OpenAI.
				390	func fromLLMToolChoice(tc *llm.ToolChoice) any {
				391	if tc == nil {
				392	return nil
				393	}
				394
				395	if tc.Type == llm.ToolChoiceTypeTool && tc.Name != "" {
				396	return openai.ToolChoice{
				397	Type: openai.ToolTypeFunction,
				398	Function: openai.ToolFunction{
				399	Name: tc.Name,
				400	},
				401	}
				402	}
				403
				404	// For non-specific tool choice, just use the string
				405	return fromLLMToolChoiceType[tc.Type]
				406	}
				407
				408	// fromLLMTool converts llm.Tool to the format expected by OpenAI.
				409	func fromLLMTool(t *llm.Tool) openai.Tool {
				410	return openai.Tool{
				411	Type: openai.ToolTypeFunction,
				412	Function: &openai.FunctionDefinition{
				413	Name: t.Name,
				414	Description: t.Description,
				415	Parameters: t.InputSchema,
				416	},
				417	}
				418	}
				419
				420	// fromLLMSystem converts llm.SystemContent to an OpenAI system message.
				421	func fromLLMSystem(systemContent []llm.SystemContent) []openai.ChatCompletionMessage {
				422	if len(systemContent) == 0 {
				423	return nil
				424	}
				425
				426	// Combine all system content into a single system message
				427	var systemText string
				428	for i, content := range systemContent {
				429	if i > 0 && systemText != "" && content.Text != "" {
				430	systemText += "\n"
				431	}
				432	systemText += content.Text
				433	}
				434
				435	if systemText == "" {
				436	return nil
				437	}
				438
				439	return []openai.ChatCompletionMessage{
				440	{
				441	Role: "system",
				442	Content: systemText,
				443	},
				444	}
				445	}
				446
				447	// toRawLLMContent converts a raw content string from OpenAI to llm.Content.
				448	func toRawLLMContent(content string) llm.Content {
				449	return llm.Content{
				450	Type: llm.ContentTypeText,
				451	Text: content,
				452	}
				453	}
				454
				455	// toToolCallLLMContent converts a tool call from OpenAI to llm.Content.
				456	func toToolCallLLMContent(toolCall openai.ToolCall) llm.Content {
				457	// Generate a content ID if needed
				458	id := toolCall.ID
				459	if id == "" {
				460	// Create a deterministic ID based on the function name if no ID is provided
				461	id = "tc_" + toolCall.Function.Name
				462	}
				463
				464	return llm.Content{
				465	ID: id,
				466	Type: llm.ContentTypeToolUse,
				467	ToolName: toolCall.Function.Name,
				468	ToolInput: json.RawMessage(toolCall.Function.Arguments),
				469	}
				470	}
				471
				472	// toToolResultLLMContent converts a tool result message from OpenAI to llm.Content.
				473	func toToolResultLLMContent(msg openai.ChatCompletionMessage) llm.Content {
				474	return llm.Content{
				475	Type: llm.ContentTypeToolResult,
				476	ToolUseID: msg.ToolCallID,
				477	ToolResult: msg.Content,
				478	ToolError: false, // OpenAI doesn't specify errors explicitly
				479	}
				480	}
				481
				482	// toLLMContents converts message content from OpenAI to []llm.Content.
				483	func toLLMContents(msg openai.ChatCompletionMessage) []llm.Content {
				484	var contents []llm.Content
				485
				486	// If this is a tool response, handle it separately
				487	if msg.Role == "tool" && msg.ToolCallID != "" {
				488	return []llm.Content{toToolResultLLMContent(msg)}
				489	}
				490
				491	// If there's text content, add it
				492	if msg.Content != "" {
				493	contents = append(contents, toRawLLMContent(msg.Content))
				494	}
				495
				496	// If there are tool calls, add them
				497	for _, tc := range msg.ToolCalls {
				498	contents = append(contents, toToolCallLLMContent(tc))
				499	}
				500
				501	// If empty, add an empty text content
				502	if len(contents) == 0 {
				503	contents = append(contents, llm.Content{
				504	Type: llm.ContentTypeText,
				505	Text: "",
				506	})
				507	}
				508
				509	return contents
				510	}
				511
				512	// toLLMUsage converts usage information from OpenAI to llm.Usage.
Josh Bleecher Snyder	66439b0	2025-05-02 18:35:32 -0700	[diff] [blame]	513	func (s *Service) toLLMUsage(au openai.Usage) llm.Usage {
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	514	// fmt.Printf("raw usage: %+v / %v / %v\n", au, au.PromptTokensDetails, au.CompletionTokensDetails)
				515	in := uint64(au.PromptTokens)
				516	var inc uint64
				517	if au.PromptTokensDetails != nil {
				518	inc = uint64(au.PromptTokensDetails.CachedTokens)
				519	}
				520	out := uint64(au.CompletionTokens)
				521	u := llm.Usage{
				522	InputTokens: in,
				523	CacheReadInputTokens: inc,
				524	CacheCreationInputTokens: in,
				525	OutputTokens: out,
				526	}
				527	u.CostUSD = s.calculateCostFromTokens(u)
				528	return u
				529	}
				530
				531	// toLLMResponse converts the OpenAI response to llm.Response.
				532	func (s Service) toLLMResponse(r openai.ChatCompletionResponse) *llm.Response {
				533	// fmt.Printf("Raw response\n")
				534	// enc := json.NewEncoder(os.Stdout)
				535	// enc.SetIndent("", " ")
				536	// enc.Encode(r)
				537	// fmt.Printf("\n")
				538
				539	if len(r.Choices) == 0 {
				540	return &llm.Response{
				541	ID: r.ID,
				542	Model: r.Model,
				543	Role: llm.MessageRoleAssistant,
Josh Bleecher Snyder	66439b0	2025-05-02 18:35:32 -0700	[diff] [blame]	544	Usage: s.toLLMUsage(r.Usage),
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	545	}
				546	}
				547
				548	// Process the primary choice
				549	choice := r.Choices[0]
				550
				551	return &llm.Response{
				552	ID: r.ID,
				553	Model: r.Model,
				554	Role: toRoleFromString(choice.Message.Role),
				555	Content: toLLMContents(choice.Message),
				556	StopReason: toStopReason(string(choice.FinishReason)),
Josh Bleecher Snyder	66439b0	2025-05-02 18:35:32 -0700	[diff] [blame]	557	Usage: s.toLLMUsage(r.Usage),
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	558	}
				559	}
				560
				561	// toRoleFromString converts a role string to llm.MessageRole.
				562	func toRoleFromString(role string) llm.MessageRole {
				563	if role == "tool" \|\| role == "system" \|\| role == "function" {
				564	return llm.MessageRoleAssistant // Map special roles to assistant for consistency
				565	}
				566	if mr, ok := toLLMRole[role]; ok {
				567	return mr
				568	}
				569	return llm.MessageRoleUser // Default to user if unknown
				570	}
				571
				572	// toStopReason converts a finish reason string to llm.StopReason.
				573	func toStopReason(reason string) llm.StopReason {
				574	if sr, ok := toLLMStopReason[reason]; ok {
				575	return sr
				576	}
				577	return llm.StopReasonStopSequence // Default
				578	}
				579
				580	// calculateCostFromTokens calculates the cost in dollars for the given model and token counts.
				581	func (s *Service) calculateCostFromTokens(u llm.Usage) float64 {
				582	cost := s.Model.Cost
				583
				584	// TODO: check this for correctness, i am skeptical
				585	// Calculate cost in cents
				586	megaCents := u.CacheCreationInputTokens*cost.Input +
				587	u.CacheReadInputTokens*cost.CachedInput +
				588	u.OutputTokens*cost.Output
				589
				590	cents := float64(megaCents) / 1_000_000
				591	// Convert to dollars
				592	dollars := cents / 100.0
				593	// fmt.Printf("in_new=%d, in_cached=%d, out=%d, cost=%.2f\n", u.CacheCreationInputTokens, u.CacheReadInputTokens, u.OutputTokens, dollars)
				594	return dollars
				595	}
				596
				597	// Do sends a request to OpenAI using the go-openai package.
				598	func (s Service) Do(ctx context.Context, ir llm.Request) (*llm.Response, error) {
				599	// Configure the OpenAI client
				600	httpc := cmp.Or(s.HTTPC, http.DefaultClient)
				601	model := cmp.Or(s.Model, DefaultModel)
				602
				603	// TODO: do this one during Service setup? maybe with a constructor instead?
				604	config := openai.DefaultConfig(s.APIKey)
				605	if model.URL != "" {
				606	config.BaseURL = model.URL
				607	}
				608	if s.Org != "" {
				609	config.OrgID = s.Org
				610	}
				611	config.HTTPClient = httpc
				612
				613	client := openai.NewClientWithConfig(config)
				614
				615	// Start with system messages if provided
				616	var allMessages []openai.ChatCompletionMessage
				617	if len(ir.System) > 0 {
				618	sysMessages := fromLLMSystem(ir.System)
				619	allMessages = append(allMessages, sysMessages...)
				620	}
				621
				622	// Add regular and tool messages
				623	for _, msg := range ir.Messages {
				624	msgs := fromLLMMessage(msg)
				625	allMessages = append(allMessages, msgs...)
				626	}
				627
				628	// Convert tools
				629	var tools []openai.Tool
				630	for _, t := range ir.Tools {
				631	tools = append(tools, fromLLMTool(t))
				632	}
				633
				634	// Create the OpenAI request
				635	req := openai.ChatCompletionRequest{
				636	Model: model.ModelName,
				637	Messages: allMessages,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	638	Tools: tools,
				639	ToolChoice: fromLLMToolChoice(ir.ToolChoice), // TODO: make fromLLMToolChoice return an error when a perfect translation is not possible
				640	}
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame^]	641	if model.IsReasoningModel {
				642	req.MaxCompletionTokens = cmp.Or(s.MaxTokens, DefaultMaxTokens)
				643	} else {
				644	req.MaxTokens = cmp.Or(s.MaxTokens, DefaultMaxTokens)
				645	}
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	646	// fmt.Printf("Sending request to OpenAI\n")
				647	// enc := json.NewEncoder(os.Stdout)
				648	// enc.SetIndent("", " ")
				649	// enc.Encode(req)
				650	// fmt.Printf("\n")
				651
				652	// Retry mechanism
				653	backoff := []time.Duration{1 * time.Second, 2 * time.Second, 5 * time.Second}
				654
				655	// retry loop
				656	for attempts := 0; ; attempts++ {
				657	resp, err := client.CreateChatCompletion(ctx, req)
				658
				659	// Handle successful response
				660	if err == nil {
				661	return s.toLLMResponse(&resp), nil
				662	}
				663
				664	// Handle errors
				665	var apiErr *openai.APIError
				666	if ok := errors.As(err, &apiErr); !ok {
				667	// Not an OpenAI API error, return immediately
				668	return nil, err
				669	}
				670
				671	switch {
				672	case apiErr.HTTPStatusCode >= 500:
				673	// Server error, try again with backoff
				674	sleep := backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
				675	slog.WarnContext(ctx, "openai_request_failed", "error", apiErr.Error(), "status_code", apiErr.HTTPStatusCode, "sleep", sleep)
				676	time.Sleep(sleep)
				677	continue
				678
				679	case apiErr.HTTPStatusCode == 429:
				680	// Rate limited, back off longer
				681	sleep := 20*time.Second + backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
				682	slog.WarnContext(ctx, "openai_request_rate_limited", "error", apiErr.Error(), "sleep", sleep)
				683	time.Sleep(sleep)
				684	continue
				685
				686	default:
				687	// Other error, return immediately
				688	return nil, fmt.Errorf("OpenAI API error: %w", err)
				689	}
				690	}
				691	}