Blame - llm/oai/oai.go - sketch

blob: c561095c7593f4ae9327c0850c31c5ccb78fd62a [file] [log] [blame]

Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	1	package oai
				2
				3	import (
				4	"cmp"
				5	"context"
				6	"encoding/json"
				7	"errors"
				8	"fmt"
				9	"log/slog"
				10	"math/rand/v2"
				11	"net/http"
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	12	"strings"
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	13	"time"
				14
				15	"github.com/sashabaranov/go-openai"
				16	"sketch.dev/llm"
				17	)
				18
				19	const (
				20	DefaultMaxTokens = 8192
				21
				22	OpenAIURL = "https://api.openai.com/v1"
				23	FireworksURL = "https://api.fireworks.ai/inference/v1"
				24	LlamaCPPURL = "http://localhost:8080/v1"
				25	TogetherURL = "https://api.together.xyz/v1"
				26	GeminiURL = "https://generativelanguage.googleapis.com/v1beta/openai/"
Josh Bleecher Snyder	fa66703	2025-05-07 14:13:27 -0700	[diff] [blame]	27	MistralURL = "https://api.mistral.ai/v1"
Josh Bleecher Snyder	2edd62e	2025-07-14 12:44:51 -0700	[diff] [blame]	28	MoonshotURL = "https://api.moonshot.ai/v1"
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	29
				30	// Environment variable names for API keys
				31	OpenAIAPIKeyEnv = "OPENAI_API_KEY"
				32	FireworksAPIKeyEnv = "FIREWORKS_API_KEY"
				33	TogetherAPIKeyEnv = "TOGETHER_API_KEY"
				34	GeminiAPIKeyEnv = "GEMINI_API_KEY"
Josh Bleecher Snyder	fa66703	2025-05-07 14:13:27 -0700	[diff] [blame]	35	MistralAPIKeyEnv = "MISTRAL_API_KEY"
Josh Bleecher Snyder	2edd62e	2025-07-14 12:44:51 -0700	[diff] [blame]	36	MoonshotAPIKeyEnv = "MOONSHOT_API_KEY"
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	37	)
				38
				39	type Model struct {
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	40	UserName string // provided by the user to identify this model (e.g. "gpt4.1")
				41	ModelName string // provided to the service provide to specify which model to use (e.g. "gpt-4.1-2025-04-14")
				42	URL string
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	43	APIKeyEnv string // environment variable name for the API key
				44	IsReasoningModel bool // whether this model is a reasoning model (e.g. O3, O4-mini)
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	45	}
				46
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	47	var (
				48	DefaultModel = GPT41
				49
				50	GPT41 = Model{
				51	UserName: "gpt4.1",
				52	ModelName: "gpt-4.1-2025-04-14",
				53	URL: OpenAIURL,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	54	APIKeyEnv: OpenAIAPIKeyEnv,
				55	}
				56
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	57	GPT4o = Model{
				58	UserName: "gpt4o",
				59	ModelName: "gpt-4o-2024-08-06",
				60	URL: OpenAIURL,
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	61	APIKeyEnv: OpenAIAPIKeyEnv,
				62	}
				63
				64	GPT4oMini = Model{
				65	UserName: "gpt4o-mini",
				66	ModelName: "gpt-4o-mini-2024-07-18",
				67	URL: OpenAIURL,
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	68	APIKeyEnv: OpenAIAPIKeyEnv,
				69	}
				70
				71	GPT41Mini = Model{
				72	UserName: "gpt4.1-mini",
				73	ModelName: "gpt-4.1-mini-2025-04-14",
				74	URL: OpenAIURL,
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	75	APIKeyEnv: OpenAIAPIKeyEnv,
				76	}
				77
				78	GPT41Nano = Model{
				79	UserName: "gpt4.1-nano",
				80	ModelName: "gpt-4.1-nano-2025-04-14",
				81	URL: OpenAIURL,
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	82	APIKeyEnv: OpenAIAPIKeyEnv,
				83	}
				84
				85	O3 = Model{
				86	UserName: "o3",
				87	ModelName: "o3-2025-04-16",
				88	URL: OpenAIURL,
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	89	APIKeyEnv: OpenAIAPIKeyEnv,
				90	IsReasoningModel: true,
				91	}
				92
				93	O4Mini = Model{
				94	UserName: "o4-mini",
				95	ModelName: "o4-mini-2025-04-16",
				96	URL: OpenAIURL,
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	97	APIKeyEnv: OpenAIAPIKeyEnv,
				98	IsReasoningModel: true,
				99	}
				100
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	101	Gemini25Flash = Model{
				102	UserName: "gemini-flash-2.5",
				103	ModelName: "gemini-2.5-flash-preview-04-17",
				104	URL: GeminiURL,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	105	APIKeyEnv: GeminiAPIKeyEnv,
				106	}
				107
				108	Gemini25Pro = Model{
				109	UserName: "gemini-pro-2.5",
				110	ModelName: "gemini-2.5-pro-preview-03-25",
				111	URL: GeminiURL,
				112	// GRRRR. Really??
				113	// Input is: $1.25, prompts <= 200k tokens, $2.50, prompts > 200k tokens
				114	// Output is: $10.00, prompts <= 200k tokens, $15.00, prompts > 200k
				115	// Caching is: $0.31, prompts <= 200k tokens, $0.625, prompts > 200k, $4.50 / 1,000,000 tokens per hour
				116	// Whatever that means. Are we caching? I have no idea.
				117	// How do you always manage to be the annoying one, Google?
				118	// I'm not complicating things just for you.
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	119	APIKeyEnv: GeminiAPIKeyEnv,
				120	}
				121
				122	TogetherDeepseekV3 = Model{
				123	UserName: "together-deepseek-v3",
				124	ModelName: "deepseek-ai/DeepSeek-V3",
				125	URL: TogetherURL,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	126	APIKeyEnv: TogetherAPIKeyEnv,
				127	}
				128
Josh Bleecher Snyder	d1bd519	2025-06-02 14:10:52 -0700	[diff] [blame]	129	TogetherDeepseekR1 = Model{
				130	UserName: "together-deepseek-r1",
				131	ModelName: "deepseek-ai/DeepSeek-R1",
				132	URL: TogetherURL,
Josh Bleecher Snyder	d1bd519	2025-06-02 14:10:52 -0700	[diff] [blame]	133	APIKeyEnv: TogetherAPIKeyEnv,
				134	}
				135
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	136	TogetherLlama4Maverick = Model{
				137	UserName: "together-llama4-maverick",
				138	ModelName: "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
				139	URL: TogetherURL,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	140	APIKeyEnv: TogetherAPIKeyEnv,
				141	}
				142
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	143	FireworksLlama4Maverick = Model{
				144	UserName: "fireworks-llama4-maverick",
				145	ModelName: "accounts/fireworks/models/llama4-maverick-instruct-basic",
				146	URL: FireworksURL,
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	147	APIKeyEnv: FireworksAPIKeyEnv,
				148	}
				149
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	150	TogetherLlama3_3_70B = Model{
				151	UserName: "together-llama3-70b",
				152	ModelName: "meta-llama/Llama-3.3-70B-Instruct-Turbo",
				153	URL: TogetherURL,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	154	APIKeyEnv: TogetherAPIKeyEnv,
				155	}
				156
				157	TogetherMistralSmall = Model{
				158	UserName: "together-mistral-small",
				159	ModelName: "mistralai/Mistral-Small-24B-Instruct-2501",
				160	URL: TogetherURL,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	161	APIKeyEnv: TogetherAPIKeyEnv,
				162	}
				163
Josh Bleecher Snyder	3e21308	2025-05-02 13:22:02 -0700	[diff] [blame]	164	TogetherQwen3 = Model{
				165	UserName: "together-qwen3",
				166	ModelName: "Qwen/Qwen3-235B-A22B-fp8-tput",
				167	URL: TogetherURL,
Josh Bleecher Snyder	3e21308	2025-05-02 13:22:02 -0700	[diff] [blame]	168	APIKeyEnv: TogetherAPIKeyEnv,
				169	}
				170
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	171	TogetherGemma2 = Model{
				172	UserName: "together-gemma2",
				173	ModelName: "google/gemma-2-27b-it",
				174	URL: TogetherURL,
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	175	APIKeyEnv: TogetherAPIKeyEnv,
				176	}
				177
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	178	LlamaCPP = Model{
				179	UserName: "llama.cpp",
				180	ModelName: "llama.cpp local model",
				181	URL: LlamaCPPURL,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	182	}
				183
				184	FireworksDeepseekV3 = Model{
				185	UserName: "fireworks-deepseek-v3",
				186	ModelName: "accounts/fireworks/models/deepseek-v3-0324",
				187	URL: FireworksURL,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	188	APIKeyEnv: FireworksAPIKeyEnv,
				189	}
Josh Bleecher Snyder	fa66703	2025-05-07 14:13:27 -0700	[diff] [blame]	190
Josh Bleecher Snyder	2edd62e	2025-07-14 12:44:51 -0700	[diff] [blame]	191	MoonshotKimiK2 = Model{
				192	UserName: "moonshot-kimi-k2",
				193	ModelName: "moonshot-v1-auto",
				194	URL: MoonshotURL,
				195	APIKeyEnv: MoonshotAPIKeyEnv,
				196	}
				197
Josh Bleecher Snyder	fa66703	2025-05-07 14:13:27 -0700	[diff] [blame]	198	MistralMedium = Model{
				199	UserName: "mistral-medium-3",
				200	ModelName: "mistral-medium-latest",
				201	URL: MistralURL,
Josh Bleecher Snyder	fa66703	2025-05-07 14:13:27 -0700	[diff] [blame]	202	APIKeyEnv: MistralAPIKeyEnv,
				203	}
Josh Bleecher Snyder	1a648f3	2025-05-21 17:15:04 +0000	[diff] [blame]	204
				205	DevstralSmall = Model{
				206	UserName: "devstral-small",
				207	ModelName: "devstral-small-latest",
				208	URL: MistralURL,
Josh Bleecher Snyder	1a648f3	2025-05-21 17:15:04 +0000	[diff] [blame]	209	APIKeyEnv: MistralAPIKeyEnv,
				210	}
Josh Bleecher Snyder	ab3702c	2025-07-24 20:22:50 +0000	[diff] [blame]	211
				212	Qwen3CoderFireworks = Model{
				213	UserName: "qwen3-coder-fireworks",
				214	ModelName: "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct",
				215	URL: FireworksURL,
				216	APIKeyEnv: FireworksAPIKeyEnv,
				217	}
Josh Bleecher Snyder	d1c1ace	2025-07-29 00:16:27 +0000	[diff] [blame]	218
				219	// Qwen is a skaband-specific model name for Qwen3-Coder
				220	// Provider details (URL and APIKeyEnv) are handled by skaband
				221	Qwen = Model{
				222	UserName: "qwen",
				223	ModelName: "qwen", // skaband will map this to the actual provider model
				224	}
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	225	)
				226
				227	// Service provides chat completions.
				228	// Fields should not be altered concurrently with calling any method on Service.
				229	type Service struct {
				230	HTTPC *http.Client // defaults to http.DefaultClient if nil
				231	APIKey string // optional, if not set will try to load from env var
				232	Model Model // defaults to DefaultModel if zero value
Josh Bleecher Snyder	d1c1ace	2025-07-29 00:16:27 +0000	[diff] [blame]	233	ModelURL string // optional, overrides Model.URL
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	234	MaxTokens int // defaults to DefaultMaxTokens if zero
				235	Org string // optional - organization ID
Josh Bleecher Snyder	57afbca	2025-07-23 13:29:59 -0700	[diff] [blame]	236	DumpLLM bool // whether to dump request/response text to files for debugging; defaults to false
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	237	}
				238
				239	var _ llm.Service = (*Service)(nil)
				240
				241	// ModelsRegistry is a registry of all known models with their user-friendly names.
				242	var ModelsRegistry = []Model{
				243	GPT41,
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	244	GPT41Mini,
				245	GPT41Nano,
				246	GPT4o,
				247	GPT4oMini,
				248	O3,
				249	O4Mini,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	250	Gemini25Flash,
				251	Gemini25Pro,
				252	TogetherDeepseekV3,
Josh Bleecher Snyder	d1bd519	2025-06-02 14:10:52 -0700	[diff] [blame]	253	TogetherDeepseekR1,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	254	TogetherLlama4Maverick,
				255	TogetherLlama3_3_70B,
				256	TogetherMistralSmall,
Josh Bleecher Snyder	3e21308	2025-05-02 13:22:02 -0700	[diff] [blame]	257	TogetherQwen3,
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	258	TogetherGemma2,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	259	LlamaCPP,
				260	FireworksDeepseekV3,
Josh Bleecher Snyder	2edd62e	2025-07-14 12:44:51 -0700	[diff] [blame]	261	MoonshotKimiK2,
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	262	FireworksLlama4Maverick,
				263	MistralMedium,
Josh Bleecher Snyder	1a648f3	2025-05-21 17:15:04 +0000	[diff] [blame]	264	DevstralSmall,
Josh Bleecher Snyder	ab3702c	2025-07-24 20:22:50 +0000	[diff] [blame]	265	Qwen3CoderFireworks,
Josh Bleecher Snyder	d1c1ace	2025-07-29 00:16:27 +0000	[diff] [blame]	266	Qwen,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	267	}
				268
				269	// ListModels returns a list of all available models with their user-friendly names.
				270	func ListModels() []string {
				271	var names []string
				272	for _, model := range ModelsRegistry {
				273	if model.UserName != "" {
				274	names = append(names, model.UserName)
				275	}
				276	}
				277	return names
				278	}
				279
				280	// ModelByUserName returns a model by its user-friendly name.
				281	// Returns nil if no model with the given name is found.
Josh Bleecher Snyder	0530da0	2025-07-23 03:47:43 +0000	[diff] [blame]	282	func ModelByUserName(name string) Model {
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	283	for _, model := range ModelsRegistry {
				284	if model.UserName == name {
Josh Bleecher Snyder	0530da0	2025-07-23 03:47:43 +0000	[diff] [blame]	285	return model
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	286	}
				287	}
Josh Bleecher Snyder	0530da0	2025-07-23 03:47:43 +0000	[diff] [blame]	288	return Model{}
				289	}
				290
				291	func (m Model) IsZero() bool {
				292	return m == Model{}
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	293	}
				294
				295	var (
				296	fromLLMRole = map[llm.MessageRole]string{
				297	llm.MessageRoleAssistant: "assistant",
				298	llm.MessageRoleUser: "user",
				299	}
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	300	fromLLMToolChoiceType = map[llm.ToolChoiceType]string{
				301	llm.ToolChoiceTypeAuto: "auto",
				302	llm.ToolChoiceTypeAny: "any",
				303	llm.ToolChoiceTypeNone: "none",
				304	llm.ToolChoiceTypeTool: "function", // OpenAI uses "function" instead of "tool"
				305	}
				306	toLLMRole = map[string]llm.MessageRole{
				307	"assistant": llm.MessageRoleAssistant,
				308	"user": llm.MessageRoleUser,
				309	}
				310	toLLMStopReason = map[string]llm.StopReason{
				311	"stop": llm.StopReasonStopSequence,
				312	"length": llm.StopReasonMaxTokens,
				313	"tool_calls": llm.StopReasonToolUse,
				314	"function_call": llm.StopReasonToolUse, // Map both to ToolUse
				315	"content_filter": llm.StopReasonStopSequence, // No direct equivalent
				316	}
				317	)
				318
				319	// fromLLMContent converts llm.Content to the format expected by OpenAI.
				320	func fromLLMContent(c llm.Content) (string, []openai.ToolCall) {
				321	switch c.Type {
				322	case llm.ContentTypeText:
				323	return c.Text, nil
				324	case llm.ContentTypeToolUse:
				325	// For OpenAI, tool use is sent as a null content with tool_calls in the message
				326	return "", []openai.ToolCall{
				327	{
				328	Type: openai.ToolTypeFunction,
				329	ID: c.ID, // Use the content ID if provided
				330	Function: openai.FunctionCall{
				331	Name: c.ToolName,
				332	Arguments: string(c.ToolInput),
				333	},
				334	},
				335	}
				336	case llm.ContentTypeToolResult:
				337	// Tool results in OpenAI are sent as a separate message with tool_call_id
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	338	// OpenAI doesn't support multiple content items or images in tool results
				339	// Combine all text content into a single string
				340	var resultText string
				341	if len(c.ToolResult) > 0 {
				342	// Collect all text from content objects
				343	texts := make([]string, 0, len(c.ToolResult))
				344	for _, result := range c.ToolResult {
				345	if result.Text != "" {
				346	texts = append(texts, result.Text)
				347	}
				348	}
				349	resultText = strings.Join(texts, "\n")
				350	}
				351	return resultText, nil
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	352	default:
				353	// For thinking or other types, convert to text
				354	return c.Text, nil
				355	}
				356	}
				357
				358	// fromLLMMessage converts llm.Message to OpenAI ChatCompletionMessage format
				359	func fromLLMMessage(msg llm.Message) []openai.ChatCompletionMessage {
				360	// For OpenAI, we need to handle tool results differently than regular messages
				361	// Each tool result becomes its own message with role="tool"
				362
				363	var messages []openai.ChatCompletionMessage
				364
				365	// Check if this is a regular message or contains tool results
				366	var regularContent []llm.Content
				367	var toolResults []llm.Content
				368
				369	for _, c := range msg.Content {
				370	if c.Type == llm.ContentTypeToolResult {
				371	toolResults = append(toolResults, c)
				372	} else {
				373	regularContent = append(regularContent, c)
				374	}
				375	}
				376
				377	// Process tool results as separate messages, but first
				378	for _, tr := range toolResults {
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	379	// Convert toolresult array to a string for OpenAI
Josh Bleecher Snyder	40c9da8	2025-07-24 21:08:20 +0000	[diff] [blame]	380	// Collect all text from content objects
				381	var texts []string
				382	for _, result := range tr.ToolResult {
				383	if strings.TrimSpace(result.Text) != "" {
				384	texts = append(texts, result.Text)
				385	}
				386	}
				387	toolResultContent := strings.Join(texts, "\n")
				388
				389	// OpenAI doesn't have an explicit error field for tool results, so add it directly to the content.
				390	if tr.ToolError {
				391	if toolResultContent != "" {
				392	toolResultContent = "error: " + toolResultContent
				393	} else {
				394	toolResultContent = "error: tool execution failed"
				395	}
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	396	}
				397
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	398	m := openai.ChatCompletionMessage{
				399	Role: "tool",
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	400	Content: cmp.Or(toolResultContent, " "), // Use empty space if empty to avoid omitempty issues
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	401	ToolCallID: tr.ToolUseID,
				402	}
				403	messages = append(messages, m)
				404	}
				405	// Process regular content second
				406	if len(regularContent) > 0 {
				407	m := openai.ChatCompletionMessage{
				408	Role: fromLLMRole[msg.Role],
				409	}
				410
				411	// For assistant messages that contain tool calls
				412	var toolCalls []openai.ToolCall
				413	var textContent string
				414
				415	for _, c := range regularContent {
				416	content, tools := fromLLMContent(c)
				417	if len(tools) > 0 {
				418	toolCalls = append(toolCalls, tools...)
				419	} else if content != "" {
				420	if textContent != "" {
				421	textContent += "\n"
				422	}
				423	textContent += content
				424	}
				425	}
				426
				427	m.Content = textContent
				428	m.ToolCalls = toolCalls
				429
				430	messages = append(messages, m)
				431	}
				432
				433	return messages
				434	}
				435
				436	// fromLLMToolChoice converts llm.ToolChoice to the format expected by OpenAI.
				437	func fromLLMToolChoice(tc *llm.ToolChoice) any {
				438	if tc == nil {
				439	return nil
				440	}
				441
				442	if tc.Type == llm.ToolChoiceTypeTool && tc.Name != "" {
				443	return openai.ToolChoice{
				444	Type: openai.ToolTypeFunction,
				445	Function: openai.ToolFunction{
				446	Name: tc.Name,
				447	},
				448	}
				449	}
				450
				451	// For non-specific tool choice, just use the string
				452	return fromLLMToolChoiceType[tc.Type]
				453	}
				454
				455	// fromLLMTool converts llm.Tool to the format expected by OpenAI.
				456	func fromLLMTool(t *llm.Tool) openai.Tool {
				457	return openai.Tool{
				458	Type: openai.ToolTypeFunction,
				459	Function: &openai.FunctionDefinition{
				460	Name: t.Name,
				461	Description: t.Description,
				462	Parameters: t.InputSchema,
				463	},
				464	}
				465	}
				466
				467	// fromLLMSystem converts llm.SystemContent to an OpenAI system message.
				468	func fromLLMSystem(systemContent []llm.SystemContent) []openai.ChatCompletionMessage {
				469	if len(systemContent) == 0 {
				470	return nil
				471	}
				472
				473	// Combine all system content into a single system message
				474	var systemText string
				475	for i, content := range systemContent {
				476	if i > 0 && systemText != "" && content.Text != "" {
				477	systemText += "\n"
				478	}
				479	systemText += content.Text
				480	}
				481
				482	if systemText == "" {
				483	return nil
				484	}
				485
				486	return []openai.ChatCompletionMessage{
				487	{
				488	Role: "system",
				489	Content: systemText,
				490	},
				491	}
				492	}
				493
				494	// toRawLLMContent converts a raw content string from OpenAI to llm.Content.
				495	func toRawLLMContent(content string) llm.Content {
				496	return llm.Content{
				497	Type: llm.ContentTypeText,
				498	Text: content,
				499	}
				500	}
				501
				502	// toToolCallLLMContent converts a tool call from OpenAI to llm.Content.
				503	func toToolCallLLMContent(toolCall openai.ToolCall) llm.Content {
				504	// Generate a content ID if needed
				505	id := toolCall.ID
				506	if id == "" {
				507	// Create a deterministic ID based on the function name if no ID is provided
				508	id = "tc_" + toolCall.Function.Name
				509	}
				510
				511	return llm.Content{
				512	ID: id,
				513	Type: llm.ContentTypeToolUse,
				514	ToolName: toolCall.Function.Name,
				515	ToolInput: json.RawMessage(toolCall.Function.Arguments),
				516	}
				517	}
				518
				519	// toToolResultLLMContent converts a tool result message from OpenAI to llm.Content.
				520	func toToolResultLLMContent(msg openai.ChatCompletionMessage) llm.Content {
				521	return llm.Content{
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	522	Type: llm.ContentTypeToolResult,
				523	ToolUseID: msg.ToolCallID,
				524	ToolResult: []llm.Content{{
				525	Type: llm.ContentTypeText,
				526	Text: msg.Content,
				527	}},
Josh Bleecher Snyder	40c9da8	2025-07-24 21:08:20 +0000	[diff] [blame]	528	ToolError: false, // OpenAI doesn't specify errors explicitly; error information is parsed from content
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	529	}
				530	}
				531
				532	// toLLMContents converts message content from OpenAI to []llm.Content.
				533	func toLLMContents(msg openai.ChatCompletionMessage) []llm.Content {
				534	var contents []llm.Content
				535
				536	// If this is a tool response, handle it separately
				537	if msg.Role == "tool" && msg.ToolCallID != "" {
				538	return []llm.Content{toToolResultLLMContent(msg)}
				539	}
				540
				541	// If there's text content, add it
				542	if msg.Content != "" {
				543	contents = append(contents, toRawLLMContent(msg.Content))
				544	}
				545
				546	// If there are tool calls, add them
				547	for _, tc := range msg.ToolCalls {
				548	contents = append(contents, toToolCallLLMContent(tc))
				549	}
				550
				551	// If empty, add an empty text content
				552	if len(contents) == 0 {
				553	contents = append(contents, llm.Content{
				554	Type: llm.ContentTypeText,
				555	Text: "",
				556	})
				557	}
				558
				559	return contents
				560	}
				561
				562	// toLLMUsage converts usage information from OpenAI to llm.Usage.
Josh Bleecher Snyder	59bb27d	2025-06-05 07:32:10 -0700	[diff] [blame]	563	func (s *Service) toLLMUsage(au openai.Usage, headers http.Header) llm.Usage {
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	564	// fmt.Printf("raw usage: %+v / %v / %v\n", au, au.PromptTokensDetails, au.CompletionTokensDetails)
				565	in := uint64(au.PromptTokens)
				566	var inc uint64
				567	if au.PromptTokensDetails != nil {
				568	inc = uint64(au.PromptTokensDetails.CachedTokens)
				569	}
				570	out := uint64(au.CompletionTokens)
				571	u := llm.Usage{
				572	InputTokens: in,
				573	CacheReadInputTokens: inc,
				574	CacheCreationInputTokens: in,
				575	OutputTokens: out,
				576	}
Josh Bleecher Snyder	59bb27d	2025-06-05 07:32:10 -0700	[diff] [blame]	577	u.CostUSD = llm.CostUSDFromResponse(headers)
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	578	return u
				579	}
				580
				581	// toLLMResponse converts the OpenAI response to llm.Response.
				582	func (s Service) toLLMResponse(r openai.ChatCompletionResponse) *llm.Response {
				583	// fmt.Printf("Raw response\n")
				584	// enc := json.NewEncoder(os.Stdout)
				585	// enc.SetIndent("", " ")
				586	// enc.Encode(r)
				587	// fmt.Printf("\n")
				588
				589	if len(r.Choices) == 0 {
				590	return &llm.Response{
				591	ID: r.ID,
				592	Model: r.Model,
				593	Role: llm.MessageRoleAssistant,
Josh Bleecher Snyder	59bb27d	2025-06-05 07:32:10 -0700	[diff] [blame]	594	Usage: s.toLLMUsage(r.Usage, r.Header()),
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	595	}
				596	}
				597
				598	// Process the primary choice
				599	choice := r.Choices[0]
				600
				601	return &llm.Response{
				602	ID: r.ID,
				603	Model: r.Model,
				604	Role: toRoleFromString(choice.Message.Role),
				605	Content: toLLMContents(choice.Message),
				606	StopReason: toStopReason(string(choice.FinishReason)),
Josh Bleecher Snyder	59bb27d	2025-06-05 07:32:10 -0700	[diff] [blame]	607	Usage: s.toLLMUsage(r.Usage, r.Header()),
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	608	}
				609	}
				610
				611	// toRoleFromString converts a role string to llm.MessageRole.
				612	func toRoleFromString(role string) llm.MessageRole {
				613	if role == "tool" \|\| role == "system" \|\| role == "function" {
				614	return llm.MessageRoleAssistant // Map special roles to assistant for consistency
				615	}
				616	if mr, ok := toLLMRole[role]; ok {
				617	return mr
				618	}
				619	return llm.MessageRoleUser // Default to user if unknown
				620	}
				621
				622	// toStopReason converts a finish reason string to llm.StopReason.
				623	func toStopReason(reason string) llm.StopReason {
				624	if sr, ok := toLLMStopReason[reason]; ok {
				625	return sr
				626	}
				627	return llm.StopReasonStopSequence // Default
				628	}
				629
Philip Zeyliger	b8a8f35	2025-06-02 07:39:37 -0700	[diff] [blame]	630	// TokenContextWindow returns the maximum token context window size for this service
				631	func (s *Service) TokenContextWindow() int {
Josh Bleecher Snyder	ab3702c	2025-07-24 20:22:50 +0000	[diff] [blame]	632	// TODO: move TokenContextWindow information to Model struct
				633
Philip Zeyliger	b8a8f35	2025-06-02 07:39:37 -0700	[diff] [blame]	634	model := cmp.Or(s.Model, DefaultModel)
				635
				636	// OpenAI models generally have 128k context windows
				637	// Some newer models have larger windows, but 128k is a safe default
				638	switch model.ModelName {
				639	case "gpt-4.1-2025-04-14", "gpt-4.1-mini-2025-04-14", "gpt-4.1-nano-2025-04-14":
				640	return 200000 // 200k for newer GPT-4.1 models
				641	case "gpt-4o-2024-08-06", "gpt-4o-mini-2024-07-18":
				642	return 128000 // 128k for GPT-4o models
				643	case "o3-2025-04-16", "o3-mini-2025-04-16":
				644	return 200000 // 200k for O3 models
Josh Bleecher Snyder	ab3702c	2025-07-24 20:22:50 +0000	[diff] [blame]	645	case "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct":
				646	return 256000 // 256k native context for Qwen3-Coder
Josh Bleecher Snyder	d1c1ace	2025-07-29 00:16:27 +0000	[diff] [blame]	647	case "qwen":
				648	return 256000 // 256k native context for Qwen3-Coder
Philip Zeyliger	b8a8f35	2025-06-02 07:39:37 -0700	[diff] [blame]	649	default:
				650	// Default for unknown models
				651	return 128000
				652	}
				653	}
				654
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	655	// Do sends a request to OpenAI using the go-openai package.
				656	func (s Service) Do(ctx context.Context, ir llm.Request) (*llm.Response, error) {
				657	// Configure the OpenAI client
				658	httpc := cmp.Or(s.HTTPC, http.DefaultClient)
				659	model := cmp.Or(s.Model, DefaultModel)
				660
				661	// TODO: do this one during Service setup? maybe with a constructor instead?
				662	config := openai.DefaultConfig(s.APIKey)
Josh Bleecher Snyder	d1c1ace	2025-07-29 00:16:27 +0000	[diff] [blame]	663	if modelURLOverride := cmp.Or(s.ModelURL, model.URL); modelURLOverride != "" {
				664	config.BaseURL = modelURLOverride
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	665	}
				666	if s.Org != "" {
				667	config.OrgID = s.Org
				668	}
				669	config.HTTPClient = httpc
				670
				671	client := openai.NewClientWithConfig(config)
				672
				673	// Start with system messages if provided
				674	var allMessages []openai.ChatCompletionMessage
				675	if len(ir.System) > 0 {
				676	sysMessages := fromLLMSystem(ir.System)
				677	allMessages = append(allMessages, sysMessages...)
				678	}
				679
				680	// Add regular and tool messages
				681	for _, msg := range ir.Messages {
				682	msgs := fromLLMMessage(msg)
				683	allMessages = append(allMessages, msgs...)
				684	}
				685
				686	// Convert tools
				687	var tools []openai.Tool
				688	for _, t := range ir.Tools {
				689	tools = append(tools, fromLLMTool(t))
				690	}
				691
				692	// Create the OpenAI request
				693	req := openai.ChatCompletionRequest{
				694	Model: model.ModelName,
				695	Messages: allMessages,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	696	Tools: tools,
				697	ToolChoice: fromLLMToolChoice(ir.ToolChoice), // TODO: make fromLLMToolChoice return an error when a perfect translation is not possible
				698	}
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	699	if model.IsReasoningModel {
				700	req.MaxCompletionTokens = cmp.Or(s.MaxTokens, DefaultMaxTokens)
				701	} else {
				702	req.MaxTokens = cmp.Or(s.MaxTokens, DefaultMaxTokens)
				703	}
Josh Bleecher Snyder	57afbca	2025-07-23 13:29:59 -0700	[diff] [blame]	704	// Dump request if enabled
				705	if s.DumpLLM {
				706	if reqJSON, err := json.MarshalIndent(req, "", " "); err == nil {
				707	// Construct the chat completions URL
				708	baseURL := cmp.Or(model.URL, OpenAIURL)
				709	url := baseURL + "/chat/completions"
				710	if err := llm.DumpToFile("request", url, reqJSON); err != nil {
				711	slog.WarnContext(ctx, "failed to dump openai request to file", "error", err)
				712	}
				713	}
				714	}
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	715
				716	// Retry mechanism
Josh Bleecher Snyder	3841199	2025-05-16 17:51:03 +0000	[diff] [blame]	717	backoff := []time.Duration{1 * time.Second, 2 * time.Second, 5 * time.Second, 10 * time.Second, 15 * time.Second}
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	718
				719	// retry loop
Josh Bleecher Snyder	3841199	2025-05-16 17:51:03 +0000	[diff] [blame]	720	var errs error // accumulated errors across all attempts
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	721	for attempts := 0; ; attempts++ {
Josh Bleecher Snyder	3841199	2025-05-16 17:51:03 +0000	[diff] [blame]	722	if attempts > 10 {
				723	return nil, fmt.Errorf("openai request failed after %d attempts: %w", attempts, errs)
				724	}
				725	if attempts > 0 {
				726	sleep := backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
				727	slog.WarnContext(ctx, "openai request sleep before retry", "sleep", sleep, "attempts", attempts)
				728	time.Sleep(sleep)
				729	}
				730
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	731	resp, err := client.CreateChatCompletion(ctx, req)
				732
				733	// Handle successful response
				734	if err == nil {
Josh Bleecher Snyder	57afbca	2025-07-23 13:29:59 -0700	[diff] [blame]	735	// Dump response if enabled
				736	if s.DumpLLM {
				737	if respJSON, jsonErr := json.MarshalIndent(resp, "", " "); jsonErr == nil {
				738	if dumpErr := llm.DumpToFile("response", "", respJSON); dumpErr != nil {
				739	slog.WarnContext(ctx, "failed to dump openai response to file", "error", dumpErr)
				740	}
				741	}
				742	}
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	743	return s.toLLMResponse(&resp), nil
				744	}
				745
				746	// Handle errors
crawshaw	5c86165	2025-07-29 16:34:52 +0000	[diff] [blame]	747	// Check for TLS "bad record MAC" errors and retry once
				748	if strings.Contains(err.Error(), "tls: bad record MAC") && attempts == 0 {
				749	slog.WarnContext(ctx, "tls bad record MAC error, retrying once", "error", err.Error())
				750	errs = errors.Join(errs, fmt.Errorf("TLS error (attempt %d): %w", attempts+1, err))
				751	continue
				752	}
				753
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	754	var apiErr *openai.APIError
				755	if ok := errors.As(err, &apiErr); !ok {
Josh Bleecher Snyder	3841199	2025-05-16 17:51:03 +0000	[diff] [blame]	756	// Not an OpenAI API error, return immediately with accumulated errors
				757	return nil, errors.Join(errs, err)
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	758	}
				759
				760	switch {
				761	case apiErr.HTTPStatusCode >= 500:
				762	// Server error, try again with backoff
Josh Bleecher Snyder	3841199	2025-05-16 17:51:03 +0000	[diff] [blame]	763	slog.WarnContext(ctx, "openai_request_failed", "error", apiErr.Error(), "status_code", apiErr.HTTPStatusCode)
				764	errs = errors.Join(errs, fmt.Errorf("status %d: %s", apiErr.HTTPStatusCode, apiErr.Error()))
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	765	continue
				766
				767	case apiErr.HTTPStatusCode == 429:
Josh Bleecher Snyder	3841199	2025-05-16 17:51:03 +0000	[diff] [blame]	768	// Rate limited, accumulate error and retry
				769	slog.WarnContext(ctx, "openai_request_rate_limited", "error", apiErr.Error())
				770	errs = errors.Join(errs, fmt.Errorf("status %d (rate limited): %s", apiErr.HTTPStatusCode, apiErr.Error()))
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	771	continue
				772
Josh Bleecher Snyder	3841199	2025-05-16 17:51:03 +0000	[diff] [blame]	773	case apiErr.HTTPStatusCode >= 400 && apiErr.HTTPStatusCode < 500:
				774	// Client error, probably unrecoverable
				775	slog.WarnContext(ctx, "openai_request_failed", "error", apiErr.Error(), "status_code", apiErr.HTTPStatusCode)
				776	return nil, errors.Join(errs, fmt.Errorf("status %d: %s", apiErr.HTTPStatusCode, apiErr.Error()))
				777
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	778	default:
Josh Bleecher Snyder	3841199	2025-05-16 17:51:03 +0000	[diff] [blame]	779	// Other error, accumulate and retry
				780	slog.WarnContext(ctx, "openai_request_failed", "error", apiErr.Error(), "status_code", apiErr.HTTPStatusCode)
				781	errs = errors.Join(errs, fmt.Errorf("status %d: %s", apiErr.HTTPStatusCode, apiErr.Error()))
				782	continue
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	783	}
				784	}
				785	}