Blame - llm/oai/oai.go - sketch

blob: 6a32a745bc341c08acf322d1ebca1a93b2234fce [file] [log] [blame]

Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	1	package oai
				2
				3	import (
				4	"cmp"
				5	"context"
				6	"encoding/json"
				7	"errors"
				8	"fmt"
				9	"log/slog"
				10	"math/rand/v2"
				11	"net/http"
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	12	"strings"
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	13	"time"
				14
				15	"github.com/sashabaranov/go-openai"
				16	"sketch.dev/llm"
				17	)
				18
				19	const (
				20	DefaultMaxTokens = 8192
				21
				22	OpenAIURL = "https://api.openai.com/v1"
				23	FireworksURL = "https://api.fireworks.ai/inference/v1"
				24	LlamaCPPURL = "http://localhost:8080/v1"
				25	TogetherURL = "https://api.together.xyz/v1"
				26	GeminiURL = "https://generativelanguage.googleapis.com/v1beta/openai/"
Josh Bleecher Snyder	fa66703	2025-05-07 14:13:27 -0700	[diff] [blame]	27	MistralURL = "https://api.mistral.ai/v1"
Josh Bleecher Snyder	2edd62e	2025-07-14 12:44:51 -0700	[diff] [blame]	28	MoonshotURL = "https://api.moonshot.ai/v1"
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	29
				30	// Environment variable names for API keys
				31	OpenAIAPIKeyEnv = "OPENAI_API_KEY"
				32	FireworksAPIKeyEnv = "FIREWORKS_API_KEY"
				33	TogetherAPIKeyEnv = "TOGETHER_API_KEY"
				34	GeminiAPIKeyEnv = "GEMINI_API_KEY"
Josh Bleecher Snyder	fa66703	2025-05-07 14:13:27 -0700	[diff] [blame]	35	MistralAPIKeyEnv = "MISTRAL_API_KEY"
Josh Bleecher Snyder	2edd62e	2025-07-14 12:44:51 -0700	[diff] [blame]	36	MoonshotAPIKeyEnv = "MOONSHOT_API_KEY"
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	37	)
				38
				39	type Model struct {
Josh Bleecher Snyder	994e984	2025-07-30 20:26:47 -0700	[diff] [blame]	40	UserName string // provided by the user to identify this model (e.g. "gpt4.1")
				41	ModelName string // provided to the service provide to specify which model to use (e.g. "gpt-4.1-2025-04-14")
				42	URL string
				43	APIKeyEnv string // environment variable name for the API key
				44	IsReasoningModel bool // whether this model is a reasoning model (e.g. O3, O4-mini)
				45	UseSimplifiedPatch bool // whether to use the simplified patch input schema; defaults to false
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	46	}
				47
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	48	var (
				49	DefaultModel = GPT41
				50
				51	GPT41 = Model{
				52	UserName: "gpt4.1",
				53	ModelName: "gpt-4.1-2025-04-14",
				54	URL: OpenAIURL,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	55	APIKeyEnv: OpenAIAPIKeyEnv,
				56	}
				57
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	58	GPT4o = Model{
				59	UserName: "gpt4o",
				60	ModelName: "gpt-4o-2024-08-06",
				61	URL: OpenAIURL,
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	62	APIKeyEnv: OpenAIAPIKeyEnv,
				63	}
				64
				65	GPT4oMini = Model{
				66	UserName: "gpt4o-mini",
				67	ModelName: "gpt-4o-mini-2024-07-18",
				68	URL: OpenAIURL,
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	69	APIKeyEnv: OpenAIAPIKeyEnv,
				70	}
				71
				72	GPT41Mini = Model{
				73	UserName: "gpt4.1-mini",
				74	ModelName: "gpt-4.1-mini-2025-04-14",
				75	URL: OpenAIURL,
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	76	APIKeyEnv: OpenAIAPIKeyEnv,
				77	}
				78
				79	GPT41Nano = Model{
				80	UserName: "gpt4.1-nano",
				81	ModelName: "gpt-4.1-nano-2025-04-14",
				82	URL: OpenAIURL,
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	83	APIKeyEnv: OpenAIAPIKeyEnv,
				84	}
				85
				86	O3 = Model{
				87	UserName: "o3",
				88	ModelName: "o3-2025-04-16",
				89	URL: OpenAIURL,
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	90	APIKeyEnv: OpenAIAPIKeyEnv,
				91	IsReasoningModel: true,
				92	}
				93
				94	O4Mini = Model{
				95	UserName: "o4-mini",
				96	ModelName: "o4-mini-2025-04-16",
				97	URL: OpenAIURL,
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	98	APIKeyEnv: OpenAIAPIKeyEnv,
				99	IsReasoningModel: true,
				100	}
				101
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	102	Gemini25Flash = Model{
				103	UserName: "gemini-flash-2.5",
				104	ModelName: "gemini-2.5-flash-preview-04-17",
				105	URL: GeminiURL,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	106	APIKeyEnv: GeminiAPIKeyEnv,
				107	}
				108
				109	Gemini25Pro = Model{
				110	UserName: "gemini-pro-2.5",
				111	ModelName: "gemini-2.5-pro-preview-03-25",
				112	URL: GeminiURL,
				113	// GRRRR. Really??
				114	// Input is: $1.25, prompts <= 200k tokens, $2.50, prompts > 200k tokens
				115	// Output is: $10.00, prompts <= 200k tokens, $15.00, prompts > 200k
				116	// Caching is: $0.31, prompts <= 200k tokens, $0.625, prompts > 200k, $4.50 / 1,000,000 tokens per hour
				117	// Whatever that means. Are we caching? I have no idea.
				118	// How do you always manage to be the annoying one, Google?
				119	// I'm not complicating things just for you.
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	120	APIKeyEnv: GeminiAPIKeyEnv,
				121	}
				122
				123	TogetherDeepseekV3 = Model{
				124	UserName: "together-deepseek-v3",
				125	ModelName: "deepseek-ai/DeepSeek-V3",
				126	URL: TogetherURL,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	127	APIKeyEnv: TogetherAPIKeyEnv,
				128	}
				129
Josh Bleecher Snyder	d1bd519	2025-06-02 14:10:52 -0700	[diff] [blame]	130	TogetherDeepseekR1 = Model{
				131	UserName: "together-deepseek-r1",
				132	ModelName: "deepseek-ai/DeepSeek-R1",
				133	URL: TogetherURL,
Josh Bleecher Snyder	d1bd519	2025-06-02 14:10:52 -0700	[diff] [blame]	134	APIKeyEnv: TogetherAPIKeyEnv,
				135	}
				136
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	137	TogetherLlama4Maverick = Model{
				138	UserName: "together-llama4-maverick",
				139	ModelName: "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
				140	URL: TogetherURL,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	141	APIKeyEnv: TogetherAPIKeyEnv,
				142	}
				143
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	144	FireworksLlama4Maverick = Model{
				145	UserName: "fireworks-llama4-maverick",
				146	ModelName: "accounts/fireworks/models/llama4-maverick-instruct-basic",
				147	URL: FireworksURL,
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	148	APIKeyEnv: FireworksAPIKeyEnv,
				149	}
				150
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	151	TogetherLlama3_3_70B = Model{
				152	UserName: "together-llama3-70b",
				153	ModelName: "meta-llama/Llama-3.3-70B-Instruct-Turbo",
				154	URL: TogetherURL,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	155	APIKeyEnv: TogetherAPIKeyEnv,
				156	}
				157
				158	TogetherMistralSmall = Model{
				159	UserName: "together-mistral-small",
				160	ModelName: "mistralai/Mistral-Small-24B-Instruct-2501",
				161	URL: TogetherURL,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	162	APIKeyEnv: TogetherAPIKeyEnv,
				163	}
				164
Josh Bleecher Snyder	3e21308	2025-05-02 13:22:02 -0700	[diff] [blame]	165	TogetherQwen3 = Model{
				166	UserName: "together-qwen3",
				167	ModelName: "Qwen/Qwen3-235B-A22B-fp8-tput",
				168	URL: TogetherURL,
Josh Bleecher Snyder	3e21308	2025-05-02 13:22:02 -0700	[diff] [blame]	169	APIKeyEnv: TogetherAPIKeyEnv,
				170	}
				171
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	172	TogetherGemma2 = Model{
				173	UserName: "together-gemma2",
				174	ModelName: "google/gemma-2-27b-it",
				175	URL: TogetherURL,
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	176	APIKeyEnv: TogetherAPIKeyEnv,
				177	}
				178
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	179	LlamaCPP = Model{
				180	UserName: "llama.cpp",
				181	ModelName: "llama.cpp local model",
				182	URL: LlamaCPPURL,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	183	}
				184
				185	FireworksDeepseekV3 = Model{
				186	UserName: "fireworks-deepseek-v3",
				187	ModelName: "accounts/fireworks/models/deepseek-v3-0324",
				188	URL: FireworksURL,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	189	APIKeyEnv: FireworksAPIKeyEnv,
				190	}
Josh Bleecher Snyder	fa66703	2025-05-07 14:13:27 -0700	[diff] [blame]	191
Josh Bleecher Snyder	2edd62e	2025-07-14 12:44:51 -0700	[diff] [blame]	192	MoonshotKimiK2 = Model{
				193	UserName: "moonshot-kimi-k2",
				194	ModelName: "moonshot-v1-auto",
				195	URL: MoonshotURL,
				196	APIKeyEnv: MoonshotAPIKeyEnv,
				197	}
				198
Josh Bleecher Snyder	fa66703	2025-05-07 14:13:27 -0700	[diff] [blame]	199	MistralMedium = Model{
				200	UserName: "mistral-medium-3",
				201	ModelName: "mistral-medium-latest",
				202	URL: MistralURL,
Josh Bleecher Snyder	fa66703	2025-05-07 14:13:27 -0700	[diff] [blame]	203	APIKeyEnv: MistralAPIKeyEnv,
				204	}
Josh Bleecher Snyder	1a648f3	2025-05-21 17:15:04 +0000	[diff] [blame]	205
				206	DevstralSmall = Model{
				207	UserName: "devstral-small",
				208	ModelName: "devstral-small-latest",
				209	URL: MistralURL,
Josh Bleecher Snyder	1a648f3	2025-05-21 17:15:04 +0000	[diff] [blame]	210	APIKeyEnv: MistralAPIKeyEnv,
				211	}
Josh Bleecher Snyder	ab3702c	2025-07-24 20:22:50 +0000	[diff] [blame]	212
				213	Qwen3CoderFireworks = Model{
Josh Bleecher Snyder	994e984	2025-07-30 20:26:47 -0700	[diff] [blame]	214	UserName: "qwen3-coder-fireworks",
				215	ModelName: "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct",
				216	URL: FireworksURL,
				217	APIKeyEnv: FireworksAPIKeyEnv,
				218	UseSimplifiedPatch: true,
Josh Bleecher Snyder	ab3702c	2025-07-24 20:22:50 +0000	[diff] [blame]	219	}
Josh Bleecher Snyder	d1c1ace	2025-07-29 00:16:27 +0000	[diff] [blame]	220
				221	// Qwen is a skaband-specific model name for Qwen3-Coder
				222	// Provider details (URL and APIKeyEnv) are handled by skaband
				223	Qwen = Model{
Josh Bleecher Snyder	994e984	2025-07-30 20:26:47 -0700	[diff] [blame]	224	UserName: "qwen",
				225	ModelName: "qwen", // skaband will map this to the actual provider model
				226	UseSimplifiedPatch: true,
Josh Bleecher Snyder	d1c1ace	2025-07-29 00:16:27 +0000	[diff] [blame]	227	}
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	228	)
				229
				230	// Service provides chat completions.
				231	// Fields should not be altered concurrently with calling any method on Service.
				232	type Service struct {
				233	HTTPC *http.Client // defaults to http.DefaultClient if nil
				234	APIKey string // optional, if not set will try to load from env var
				235	Model Model // defaults to DefaultModel if zero value
Josh Bleecher Snyder	d1c1ace	2025-07-29 00:16:27 +0000	[diff] [blame]	236	ModelURL string // optional, overrides Model.URL
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	237	MaxTokens int // defaults to DefaultMaxTokens if zero
				238	Org string // optional - organization ID
Josh Bleecher Snyder	57afbca	2025-07-23 13:29:59 -0700	[diff] [blame]	239	DumpLLM bool // whether to dump request/response text to files for debugging; defaults to false
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	240	}
				241
				242	var _ llm.Service = (*Service)(nil)
				243
				244	// ModelsRegistry is a registry of all known models with their user-friendly names.
				245	var ModelsRegistry = []Model{
				246	GPT41,
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	247	GPT41Mini,
				248	GPT41Nano,
				249	GPT4o,
				250	GPT4oMini,
				251	O3,
				252	O4Mini,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	253	Gemini25Flash,
				254	Gemini25Pro,
				255	TogetherDeepseekV3,
Josh Bleecher Snyder	d1bd519	2025-06-02 14:10:52 -0700	[diff] [blame]	256	TogetherDeepseekR1,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	257	TogetherLlama4Maverick,
				258	TogetherLlama3_3_70B,
				259	TogetherMistralSmall,
Josh Bleecher Snyder	3e21308	2025-05-02 13:22:02 -0700	[diff] [blame]	260	TogetherQwen3,
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	261	TogetherGemma2,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	262	LlamaCPP,
				263	FireworksDeepseekV3,
Josh Bleecher Snyder	2edd62e	2025-07-14 12:44:51 -0700	[diff] [blame]	264	MoonshotKimiK2,
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	265	FireworksLlama4Maverick,
				266	MistralMedium,
Josh Bleecher Snyder	1a648f3	2025-05-21 17:15:04 +0000	[diff] [blame]	267	DevstralSmall,
Josh Bleecher Snyder	ab3702c	2025-07-24 20:22:50 +0000	[diff] [blame]	268	Qwen3CoderFireworks,
Josh Bleecher Snyder	d1c1ace	2025-07-29 00:16:27 +0000	[diff] [blame]	269	Qwen,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	270	}
				271
				272	// ListModels returns a list of all available models with their user-friendly names.
				273	func ListModels() []string {
				274	var names []string
				275	for _, model := range ModelsRegistry {
				276	if model.UserName != "" {
				277	names = append(names, model.UserName)
				278	}
				279	}
				280	return names
				281	}
				282
				283	// ModelByUserName returns a model by its user-friendly name.
				284	// Returns nil if no model with the given name is found.
Josh Bleecher Snyder	0530da0	2025-07-23 03:47:43 +0000	[diff] [blame]	285	func ModelByUserName(name string) Model {
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	286	for _, model := range ModelsRegistry {
				287	if model.UserName == name {
Josh Bleecher Snyder	0530da0	2025-07-23 03:47:43 +0000	[diff] [blame]	288	return model
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	289	}
				290	}
Josh Bleecher Snyder	0530da0	2025-07-23 03:47:43 +0000	[diff] [blame]	291	return Model{}
				292	}
				293
				294	func (m Model) IsZero() bool {
				295	return m == Model{}
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	296	}
				297
				298	var (
				299	fromLLMRole = map[llm.MessageRole]string{
				300	llm.MessageRoleAssistant: "assistant",
				301	llm.MessageRoleUser: "user",
				302	}
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	303	fromLLMToolChoiceType = map[llm.ToolChoiceType]string{
				304	llm.ToolChoiceTypeAuto: "auto",
				305	llm.ToolChoiceTypeAny: "any",
				306	llm.ToolChoiceTypeNone: "none",
				307	llm.ToolChoiceTypeTool: "function", // OpenAI uses "function" instead of "tool"
				308	}
				309	toLLMRole = map[string]llm.MessageRole{
				310	"assistant": llm.MessageRoleAssistant,
				311	"user": llm.MessageRoleUser,
				312	}
				313	toLLMStopReason = map[string]llm.StopReason{
				314	"stop": llm.StopReasonStopSequence,
				315	"length": llm.StopReasonMaxTokens,
				316	"tool_calls": llm.StopReasonToolUse,
				317	"function_call": llm.StopReasonToolUse, // Map both to ToolUse
				318	"content_filter": llm.StopReasonStopSequence, // No direct equivalent
				319	}
				320	)
				321
				322	// fromLLMContent converts llm.Content to the format expected by OpenAI.
				323	func fromLLMContent(c llm.Content) (string, []openai.ToolCall) {
				324	switch c.Type {
				325	case llm.ContentTypeText:
				326	return c.Text, nil
				327	case llm.ContentTypeToolUse:
				328	// For OpenAI, tool use is sent as a null content with tool_calls in the message
				329	return "", []openai.ToolCall{
				330	{
				331	Type: openai.ToolTypeFunction,
				332	ID: c.ID, // Use the content ID if provided
				333	Function: openai.FunctionCall{
				334	Name: c.ToolName,
				335	Arguments: string(c.ToolInput),
				336	},
				337	},
				338	}
				339	case llm.ContentTypeToolResult:
				340	// Tool results in OpenAI are sent as a separate message with tool_call_id
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	341	// OpenAI doesn't support multiple content items or images in tool results
				342	// Combine all text content into a single string
				343	var resultText string
				344	if len(c.ToolResult) > 0 {
				345	// Collect all text from content objects
				346	texts := make([]string, 0, len(c.ToolResult))
				347	for _, result := range c.ToolResult {
				348	if result.Text != "" {
				349	texts = append(texts, result.Text)
				350	}
				351	}
				352	resultText = strings.Join(texts, "\n")
				353	}
				354	return resultText, nil
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	355	default:
				356	// For thinking or other types, convert to text
				357	return c.Text, nil
				358	}
				359	}
				360
				361	// fromLLMMessage converts llm.Message to OpenAI ChatCompletionMessage format
				362	func fromLLMMessage(msg llm.Message) []openai.ChatCompletionMessage {
				363	// For OpenAI, we need to handle tool results differently than regular messages
				364	// Each tool result becomes its own message with role="tool"
				365
				366	var messages []openai.ChatCompletionMessage
				367
				368	// Check if this is a regular message or contains tool results
				369	var regularContent []llm.Content
				370	var toolResults []llm.Content
				371
				372	for _, c := range msg.Content {
				373	if c.Type == llm.ContentTypeToolResult {
				374	toolResults = append(toolResults, c)
				375	} else {
				376	regularContent = append(regularContent, c)
				377	}
				378	}
				379
				380	// Process tool results as separate messages, but first
				381	for _, tr := range toolResults {
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	382	// Convert toolresult array to a string for OpenAI
Josh Bleecher Snyder	40c9da8	2025-07-24 21:08:20 +0000	[diff] [blame]	383	// Collect all text from content objects
				384	var texts []string
				385	for _, result := range tr.ToolResult {
				386	if strings.TrimSpace(result.Text) != "" {
				387	texts = append(texts, result.Text)
				388	}
				389	}
				390	toolResultContent := strings.Join(texts, "\n")
				391
				392	// OpenAI doesn't have an explicit error field for tool results, so add it directly to the content.
				393	if tr.ToolError {
				394	if toolResultContent != "" {
				395	toolResultContent = "error: " + toolResultContent
				396	} else {
				397	toolResultContent = "error: tool execution failed"
				398	}
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	399	}
				400
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	401	m := openai.ChatCompletionMessage{
				402	Role: "tool",
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	403	Content: cmp.Or(toolResultContent, " "), // Use empty space if empty to avoid omitempty issues
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	404	ToolCallID: tr.ToolUseID,
				405	}
				406	messages = append(messages, m)
				407	}
				408	// Process regular content second
				409	if len(regularContent) > 0 {
				410	m := openai.ChatCompletionMessage{
				411	Role: fromLLMRole[msg.Role],
				412	}
				413
				414	// For assistant messages that contain tool calls
				415	var toolCalls []openai.ToolCall
				416	var textContent string
				417
				418	for _, c := range regularContent {
				419	content, tools := fromLLMContent(c)
				420	if len(tools) > 0 {
				421	toolCalls = append(toolCalls, tools...)
				422	} else if content != "" {
				423	if textContent != "" {
				424	textContent += "\n"
				425	}
				426	textContent += content
				427	}
				428	}
				429
				430	m.Content = textContent
				431	m.ToolCalls = toolCalls
				432
				433	messages = append(messages, m)
				434	}
				435
				436	return messages
				437	}
				438
				439	// fromLLMToolChoice converts llm.ToolChoice to the format expected by OpenAI.
				440	func fromLLMToolChoice(tc *llm.ToolChoice) any {
				441	if tc == nil {
				442	return nil
				443	}
				444
				445	if tc.Type == llm.ToolChoiceTypeTool && tc.Name != "" {
				446	return openai.ToolChoice{
				447	Type: openai.ToolTypeFunction,
				448	Function: openai.ToolFunction{
				449	Name: tc.Name,
				450	},
				451	}
				452	}
				453
				454	// For non-specific tool choice, just use the string
				455	return fromLLMToolChoiceType[tc.Type]
				456	}
				457
				458	// fromLLMTool converts llm.Tool to the format expected by OpenAI.
				459	func fromLLMTool(t *llm.Tool) openai.Tool {
				460	return openai.Tool{
				461	Type: openai.ToolTypeFunction,
				462	Function: &openai.FunctionDefinition{
				463	Name: t.Name,
				464	Description: t.Description,
				465	Parameters: t.InputSchema,
				466	},
				467	}
				468	}
				469
				470	// fromLLMSystem converts llm.SystemContent to an OpenAI system message.
				471	func fromLLMSystem(systemContent []llm.SystemContent) []openai.ChatCompletionMessage {
				472	if len(systemContent) == 0 {
				473	return nil
				474	}
				475
				476	// Combine all system content into a single system message
				477	var systemText string
				478	for i, content := range systemContent {
				479	if i > 0 && systemText != "" && content.Text != "" {
				480	systemText += "\n"
				481	}
				482	systemText += content.Text
				483	}
				484
				485	if systemText == "" {
				486	return nil
				487	}
				488
				489	return []openai.ChatCompletionMessage{
				490	{
				491	Role: "system",
				492	Content: systemText,
				493	},
				494	}
				495	}
				496
				497	// toRawLLMContent converts a raw content string from OpenAI to llm.Content.
				498	func toRawLLMContent(content string) llm.Content {
				499	return llm.Content{
				500	Type: llm.ContentTypeText,
				501	Text: content,
				502	}
				503	}
				504
				505	// toToolCallLLMContent converts a tool call from OpenAI to llm.Content.
				506	func toToolCallLLMContent(toolCall openai.ToolCall) llm.Content {
				507	// Generate a content ID if needed
				508	id := toolCall.ID
				509	if id == "" {
				510	// Create a deterministic ID based on the function name if no ID is provided
				511	id = "tc_" + toolCall.Function.Name
				512	}
				513
				514	return llm.Content{
				515	ID: id,
				516	Type: llm.ContentTypeToolUse,
				517	ToolName: toolCall.Function.Name,
				518	ToolInput: json.RawMessage(toolCall.Function.Arguments),
				519	}
				520	}
				521
				522	// toToolResultLLMContent converts a tool result message from OpenAI to llm.Content.
				523	func toToolResultLLMContent(msg openai.ChatCompletionMessage) llm.Content {
				524	return llm.Content{
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	525	Type: llm.ContentTypeToolResult,
				526	ToolUseID: msg.ToolCallID,
				527	ToolResult: []llm.Content{{
				528	Type: llm.ContentTypeText,
				529	Text: msg.Content,
				530	}},
Josh Bleecher Snyder	40c9da8	2025-07-24 21:08:20 +0000	[diff] [blame]	531	ToolError: false, // OpenAI doesn't specify errors explicitly; error information is parsed from content
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	532	}
				533	}
				534
				535	// toLLMContents converts message content from OpenAI to []llm.Content.
				536	func toLLMContents(msg openai.ChatCompletionMessage) []llm.Content {
				537	var contents []llm.Content
				538
				539	// If this is a tool response, handle it separately
				540	if msg.Role == "tool" && msg.ToolCallID != "" {
				541	return []llm.Content{toToolResultLLMContent(msg)}
				542	}
				543
				544	// If there's text content, add it
				545	if msg.Content != "" {
				546	contents = append(contents, toRawLLMContent(msg.Content))
				547	}
				548
				549	// If there are tool calls, add them
				550	for _, tc := range msg.ToolCalls {
				551	contents = append(contents, toToolCallLLMContent(tc))
				552	}
				553
				554	// If empty, add an empty text content
				555	if len(contents) == 0 {
				556	contents = append(contents, llm.Content{
				557	Type: llm.ContentTypeText,
				558	Text: "",
				559	})
				560	}
				561
				562	return contents
				563	}
				564
				565	// toLLMUsage converts usage information from OpenAI to llm.Usage.
Josh Bleecher Snyder	59bb27d	2025-06-05 07:32:10 -0700	[diff] [blame]	566	func (s *Service) toLLMUsage(au openai.Usage, headers http.Header) llm.Usage {
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	567	// fmt.Printf("raw usage: %+v / %v / %v\n", au, au.PromptTokensDetails, au.CompletionTokensDetails)
				568	in := uint64(au.PromptTokens)
				569	var inc uint64
				570	if au.PromptTokensDetails != nil {
				571	inc = uint64(au.PromptTokensDetails.CachedTokens)
				572	}
				573	out := uint64(au.CompletionTokens)
				574	u := llm.Usage{
				575	InputTokens: in,
				576	CacheReadInputTokens: inc,
				577	CacheCreationInputTokens: in,
				578	OutputTokens: out,
				579	}
Josh Bleecher Snyder	59bb27d	2025-06-05 07:32:10 -0700	[diff] [blame]	580	u.CostUSD = llm.CostUSDFromResponse(headers)
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	581	return u
				582	}
				583
				584	// toLLMResponse converts the OpenAI response to llm.Response.
				585	func (s Service) toLLMResponse(r openai.ChatCompletionResponse) *llm.Response {
				586	// fmt.Printf("Raw response\n")
				587	// enc := json.NewEncoder(os.Stdout)
				588	// enc.SetIndent("", " ")
				589	// enc.Encode(r)
				590	// fmt.Printf("\n")
				591
				592	if len(r.Choices) == 0 {
				593	return &llm.Response{
				594	ID: r.ID,
				595	Model: r.Model,
				596	Role: llm.MessageRoleAssistant,
Josh Bleecher Snyder	59bb27d	2025-06-05 07:32:10 -0700	[diff] [blame]	597	Usage: s.toLLMUsage(r.Usage, r.Header()),
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	598	}
				599	}
				600
				601	// Process the primary choice
				602	choice := r.Choices[0]
				603
				604	return &llm.Response{
				605	ID: r.ID,
				606	Model: r.Model,
				607	Role: toRoleFromString(choice.Message.Role),
				608	Content: toLLMContents(choice.Message),
				609	StopReason: toStopReason(string(choice.FinishReason)),
Josh Bleecher Snyder	59bb27d	2025-06-05 07:32:10 -0700	[diff] [blame]	610	Usage: s.toLLMUsage(r.Usage, r.Header()),
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	611	}
				612	}
				613
				614	// toRoleFromString converts a role string to llm.MessageRole.
				615	func toRoleFromString(role string) llm.MessageRole {
				616	if role == "tool" \|\| role == "system" \|\| role == "function" {
				617	return llm.MessageRoleAssistant // Map special roles to assistant for consistency
				618	}
				619	if mr, ok := toLLMRole[role]; ok {
				620	return mr
				621	}
				622	return llm.MessageRoleUser // Default to user if unknown
				623	}
				624
				625	// toStopReason converts a finish reason string to llm.StopReason.
				626	func toStopReason(reason string) llm.StopReason {
				627	if sr, ok := toLLMStopReason[reason]; ok {
				628	return sr
				629	}
				630	return llm.StopReasonStopSequence // Default
				631	}
				632
Philip Zeyliger	b8a8f35	2025-06-02 07:39:37 -0700	[diff] [blame]	633	// TokenContextWindow returns the maximum token context window size for this service
				634	func (s *Service) TokenContextWindow() int {
Josh Bleecher Snyder	ab3702c	2025-07-24 20:22:50 +0000	[diff] [blame]	635	// TODO: move TokenContextWindow information to Model struct
				636
Philip Zeyliger	b8a8f35	2025-06-02 07:39:37 -0700	[diff] [blame]	637	model := cmp.Or(s.Model, DefaultModel)
				638
				639	// OpenAI models generally have 128k context windows
				640	// Some newer models have larger windows, but 128k is a safe default
				641	switch model.ModelName {
				642	case "gpt-4.1-2025-04-14", "gpt-4.1-mini-2025-04-14", "gpt-4.1-nano-2025-04-14":
				643	return 200000 // 200k for newer GPT-4.1 models
				644	case "gpt-4o-2024-08-06", "gpt-4o-mini-2024-07-18":
				645	return 128000 // 128k for GPT-4o models
				646	case "o3-2025-04-16", "o3-mini-2025-04-16":
				647	return 200000 // 200k for O3 models
Josh Bleecher Snyder	ab3702c	2025-07-24 20:22:50 +0000	[diff] [blame]	648	case "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct":
				649	return 256000 // 256k native context for Qwen3-Coder
Josh Bleecher Snyder	d1c1ace	2025-07-29 00:16:27 +0000	[diff] [blame]	650	case "qwen":
				651	return 256000 // 256k native context for Qwen3-Coder
Philip Zeyliger	b8a8f35	2025-06-02 07:39:37 -0700	[diff] [blame]	652	default:
				653	// Default for unknown models
				654	return 128000
				655	}
				656	}
				657
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	658	// Do sends a request to OpenAI using the go-openai package.
				659	func (s Service) Do(ctx context.Context, ir llm.Request) (*llm.Response, error) {
				660	// Configure the OpenAI client
				661	httpc := cmp.Or(s.HTTPC, http.DefaultClient)
				662	model := cmp.Or(s.Model, DefaultModel)
				663
				664	// TODO: do this one during Service setup? maybe with a constructor instead?
				665	config := openai.DefaultConfig(s.APIKey)
Josh Bleecher Snyder	d1c1ace	2025-07-29 00:16:27 +0000	[diff] [blame]	666	if modelURLOverride := cmp.Or(s.ModelURL, model.URL); modelURLOverride != "" {
				667	config.BaseURL = modelURLOverride
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	668	}
				669	if s.Org != "" {
				670	config.OrgID = s.Org
				671	}
				672	config.HTTPClient = httpc
				673
				674	client := openai.NewClientWithConfig(config)
				675
				676	// Start with system messages if provided
				677	var allMessages []openai.ChatCompletionMessage
				678	if len(ir.System) > 0 {
				679	sysMessages := fromLLMSystem(ir.System)
				680	allMessages = append(allMessages, sysMessages...)
				681	}
				682
				683	// Add regular and tool messages
				684	for _, msg := range ir.Messages {
				685	msgs := fromLLMMessage(msg)
				686	allMessages = append(allMessages, msgs...)
				687	}
				688
				689	// Convert tools
				690	var tools []openai.Tool
				691	for _, t := range ir.Tools {
				692	tools = append(tools, fromLLMTool(t))
				693	}
				694
				695	// Create the OpenAI request
				696	req := openai.ChatCompletionRequest{
				697	Model: model.ModelName,
				698	Messages: allMessages,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	699	Tools: tools,
				700	ToolChoice: fromLLMToolChoice(ir.ToolChoice), // TODO: make fromLLMToolChoice return an error when a perfect translation is not possible
				701	}
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	702	if model.IsReasoningModel {
				703	req.MaxCompletionTokens = cmp.Or(s.MaxTokens, DefaultMaxTokens)
				704	} else {
				705	req.MaxTokens = cmp.Or(s.MaxTokens, DefaultMaxTokens)
				706	}
Josh Bleecher Snyder	57afbca	2025-07-23 13:29:59 -0700	[diff] [blame]	707	// Dump request if enabled
				708	if s.DumpLLM {
				709	if reqJSON, err := json.MarshalIndent(req, "", " "); err == nil {
				710	// Construct the chat completions URL
				711	baseURL := cmp.Or(model.URL, OpenAIURL)
				712	url := baseURL + "/chat/completions"
				713	if err := llm.DumpToFile("request", url, reqJSON); err != nil {
				714	slog.WarnContext(ctx, "failed to dump openai request to file", "error", err)
				715	}
				716	}
				717	}
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	718
				719	// Retry mechanism
Josh Bleecher Snyder	3841199	2025-05-16 17:51:03 +0000	[diff] [blame]	720	backoff := []time.Duration{1 * time.Second, 2 * time.Second, 5 * time.Second, 10 * time.Second, 15 * time.Second}
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	721
				722	// retry loop
Josh Bleecher Snyder	3841199	2025-05-16 17:51:03 +0000	[diff] [blame]	723	var errs error // accumulated errors across all attempts
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	724	for attempts := 0; ; attempts++ {
Josh Bleecher Snyder	3841199	2025-05-16 17:51:03 +0000	[diff] [blame]	725	if attempts > 10 {
				726	return nil, fmt.Errorf("openai request failed after %d attempts: %w", attempts, errs)
				727	}
				728	if attempts > 0 {
				729	sleep := backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
				730	slog.WarnContext(ctx, "openai request sleep before retry", "sleep", sleep, "attempts", attempts)
				731	time.Sleep(sleep)
				732	}
				733
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	734	resp, err := client.CreateChatCompletion(ctx, req)
				735
				736	// Handle successful response
				737	if err == nil {
Josh Bleecher Snyder	57afbca	2025-07-23 13:29:59 -0700	[diff] [blame]	738	// Dump response if enabled
				739	if s.DumpLLM {
				740	if respJSON, jsonErr := json.MarshalIndent(resp, "", " "); jsonErr == nil {
				741	if dumpErr := llm.DumpToFile("response", "", respJSON); dumpErr != nil {
				742	slog.WarnContext(ctx, "failed to dump openai response to file", "error", dumpErr)
				743	}
				744	}
				745	}
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	746	return s.toLLMResponse(&resp), nil
				747	}
				748
				749	// Handle errors
crawshaw	5c86165	2025-07-29 16:34:52 +0000	[diff] [blame]	750	// Check for TLS "bad record MAC" errors and retry once
				751	if strings.Contains(err.Error(), "tls: bad record MAC") && attempts == 0 {
				752	slog.WarnContext(ctx, "tls bad record MAC error, retrying once", "error", err.Error())
				753	errs = errors.Join(errs, fmt.Errorf("TLS error (attempt %d): %w", attempts+1, err))
				754	continue
				755	}
				756
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	757	var apiErr *openai.APIError
				758	if ok := errors.As(err, &apiErr); !ok {
Josh Bleecher Snyder	3841199	2025-05-16 17:51:03 +0000	[diff] [blame]	759	// Not an OpenAI API error, return immediately with accumulated errors
				760	return nil, errors.Join(errs, err)
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	761	}
				762
				763	switch {
				764	case apiErr.HTTPStatusCode >= 500:
				765	// Server error, try again with backoff
Josh Bleecher Snyder	3841199	2025-05-16 17:51:03 +0000	[diff] [blame]	766	slog.WarnContext(ctx, "openai_request_failed", "error", apiErr.Error(), "status_code", apiErr.HTTPStatusCode)
				767	errs = errors.Join(errs, fmt.Errorf("status %d: %s", apiErr.HTTPStatusCode, apiErr.Error()))
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	768	continue
				769
				770	case apiErr.HTTPStatusCode == 429:
Josh Bleecher Snyder	3841199	2025-05-16 17:51:03 +0000	[diff] [blame]	771	// Rate limited, accumulate error and retry
				772	slog.WarnContext(ctx, "openai_request_rate_limited", "error", apiErr.Error())
				773	errs = errors.Join(errs, fmt.Errorf("status %d (rate limited): %s", apiErr.HTTPStatusCode, apiErr.Error()))
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	774	continue
				775
Josh Bleecher Snyder	3841199	2025-05-16 17:51:03 +0000	[diff] [blame]	776	case apiErr.HTTPStatusCode >= 400 && apiErr.HTTPStatusCode < 500:
				777	// Client error, probably unrecoverable
				778	slog.WarnContext(ctx, "openai_request_failed", "error", apiErr.Error(), "status_code", apiErr.HTTPStatusCode)
				779	return nil, errors.Join(errs, fmt.Errorf("status %d: %s", apiErr.HTTPStatusCode, apiErr.Error()))
				780
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	781	default:
Josh Bleecher Snyder	3841199	2025-05-16 17:51:03 +0000	[diff] [blame]	782	// Other error, accumulate and retry
				783	slog.WarnContext(ctx, "openai_request_failed", "error", apiErr.Error(), "status_code", apiErr.HTTPStatusCode)
				784	errs = errors.Join(errs, fmt.Errorf("status %d: %s", apiErr.HTTPStatusCode, apiErr.Error()))
				785	continue
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	786	}
				787	}
				788	}
Josh Bleecher Snyder	994e984	2025-07-30 20:26:47 -0700	[diff] [blame]	789
				790	func (s *Service) UseSimplifiedPatch() bool {
				791	return s.Model.UseSimplifiedPatch
				792	}