Blame - llm/oai/oai.go - sketch

blob: 4b046b008efec70211fde035a3713af1de390471 [file] [log] [blame]

Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	1	package oai
				2
				3	import (
				4	"cmp"
				5	"context"
				6	"encoding/json"
				7	"errors"
				8	"fmt"
				9	"log/slog"
				10	"math/rand/v2"
				11	"net/http"
				12	"time"
				13
				14	"github.com/sashabaranov/go-openai"
				15	"sketch.dev/llm"
				16	)
				17
				18	const (
				19	DefaultMaxTokens = 8192
				20
				21	OpenAIURL = "https://api.openai.com/v1"
				22	FireworksURL = "https://api.fireworks.ai/inference/v1"
				23	LlamaCPPURL = "http://localhost:8080/v1"
				24	TogetherURL = "https://api.together.xyz/v1"
				25	GeminiURL = "https://generativelanguage.googleapis.com/v1beta/openai/"
Josh Bleecher Snyder	fa66703	2025-05-07 14:13:27 -0700	[diff] [blame]	26	MistralURL = "https://api.mistral.ai/v1"
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	27
				28	// Environment variable names for API keys
				29	OpenAIAPIKeyEnv = "OPENAI_API_KEY"
				30	FireworksAPIKeyEnv = "FIREWORKS_API_KEY"
				31	TogetherAPIKeyEnv = "TOGETHER_API_KEY"
				32	GeminiAPIKeyEnv = "GEMINI_API_KEY"
Josh Bleecher Snyder	fa66703	2025-05-07 14:13:27 -0700	[diff] [blame]	33	MistralAPIKeyEnv = "MISTRAL_API_KEY"
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	34	)
				35
				36	type Model struct {
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	37	UserName string // provided by the user to identify this model (e.g. "gpt4.1")
				38	ModelName string // provided to the service provide to specify which model to use (e.g. "gpt-4.1-2025-04-14")
				39	URL string
				40	Cost ModelCost
				41	APIKeyEnv string // environment variable name for the API key
				42	IsReasoningModel bool // whether this model is a reasoning model (e.g. O3, O4-mini)
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	43	}
				44
				45	type ModelCost struct {
				46	Input uint64 // in cents per million tokens
				47	CachedInput uint64 // in cents per million tokens
				48	Output uint64 // in cents per million tokens
				49	}
				50
				51	var (
				52	DefaultModel = GPT41
				53
				54	GPT41 = Model{
				55	UserName: "gpt4.1",
				56	ModelName: "gpt-4.1-2025-04-14",
				57	URL: OpenAIURL,
				58	Cost: ModelCost{Input: 200, CachedInput: 50, Output: 800},
				59	APIKeyEnv: OpenAIAPIKeyEnv,
				60	}
				61
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	62	GPT4o = Model{
				63	UserName: "gpt4o",
				64	ModelName: "gpt-4o-2024-08-06",
				65	URL: OpenAIURL,
				66	Cost: ModelCost{Input: 250, CachedInput: 125, Output: 1000},
				67	APIKeyEnv: OpenAIAPIKeyEnv,
				68	}
				69
				70	GPT4oMini = Model{
				71	UserName: "gpt4o-mini",
				72	ModelName: "gpt-4o-mini-2024-07-18",
				73	URL: OpenAIURL,
				74	Cost: ModelCost{Input: 15, CachedInput: 8, Output: 60}, // 8 is actually 7.5 GRRR round up for now oh well
				75	APIKeyEnv: OpenAIAPIKeyEnv,
				76	}
				77
				78	GPT41Mini = Model{
				79	UserName: "gpt4.1-mini",
				80	ModelName: "gpt-4.1-mini-2025-04-14",
				81	URL: OpenAIURL,
				82	Cost: ModelCost{Input: 40, CachedInput: 10, Output: 160},
				83	APIKeyEnv: OpenAIAPIKeyEnv,
				84	}
				85
				86	GPT41Nano = Model{
				87	UserName: "gpt4.1-nano",
				88	ModelName: "gpt-4.1-nano-2025-04-14",
				89	URL: OpenAIURL,
				90	Cost: ModelCost{Input: 10, CachedInput: 3, Output: 40}, // 3 is actually 2.5 GRRR round up for now oh well
				91	APIKeyEnv: OpenAIAPIKeyEnv,
				92	}
				93
				94	O3 = Model{
				95	UserName: "o3",
				96	ModelName: "o3-2025-04-16",
				97	URL: OpenAIURL,
				98	Cost: ModelCost{Input: 1000, CachedInput: 250, Output: 4000},
				99	APIKeyEnv: OpenAIAPIKeyEnv,
				100	IsReasoningModel: true,
				101	}
				102
				103	O4Mini = Model{
				104	UserName: "o4-mini",
				105	ModelName: "o4-mini-2025-04-16",
				106	URL: OpenAIURL,
				107	Cost: ModelCost{Input: 110, CachedInput: 28, Output: 440}, // 28 is actually 27.5 GRRR round up for now oh well
				108	APIKeyEnv: OpenAIAPIKeyEnv,
				109	IsReasoningModel: true,
				110	}
				111
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	112	Gemini25Flash = Model{
				113	UserName: "gemini-flash-2.5",
				114	ModelName: "gemini-2.5-flash-preview-04-17",
				115	URL: GeminiURL,
				116	Cost: ModelCost{Input: 15, Output: 60},
				117	APIKeyEnv: GeminiAPIKeyEnv,
				118	}
				119
				120	Gemini25Pro = Model{
				121	UserName: "gemini-pro-2.5",
				122	ModelName: "gemini-2.5-pro-preview-03-25",
				123	URL: GeminiURL,
				124	// GRRRR. Really??
				125	// Input is: $1.25, prompts <= 200k tokens, $2.50, prompts > 200k tokens
				126	// Output is: $10.00, prompts <= 200k tokens, $15.00, prompts > 200k
				127	// Caching is: $0.31, prompts <= 200k tokens, $0.625, prompts > 200k, $4.50 / 1,000,000 tokens per hour
				128	// Whatever that means. Are we caching? I have no idea.
				129	// How do you always manage to be the annoying one, Google?
				130	// I'm not complicating things just for you.
				131	Cost: ModelCost{Input: 125, Output: 1000},
				132	APIKeyEnv: GeminiAPIKeyEnv,
				133	}
				134
				135	TogetherDeepseekV3 = Model{
				136	UserName: "together-deepseek-v3",
				137	ModelName: "deepseek-ai/DeepSeek-V3",
				138	URL: TogetherURL,
				139	Cost: ModelCost{Input: 125, Output: 125},
				140	APIKeyEnv: TogetherAPIKeyEnv,
				141	}
				142
				143	TogetherLlama4Maverick = Model{
				144	UserName: "together-llama4-maverick",
				145	ModelName: "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
				146	URL: TogetherURL,
				147	Cost: ModelCost{Input: 27, Output: 85},
				148	APIKeyEnv: TogetherAPIKeyEnv,
				149	}
				150
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	151	FireworksLlama4Maverick = Model{
				152	UserName: "fireworks-llama4-maverick",
				153	ModelName: "accounts/fireworks/models/llama4-maverick-instruct-basic",
				154	URL: FireworksURL,
				155	Cost: ModelCost{Input: 22, Output: 88},
				156	APIKeyEnv: FireworksAPIKeyEnv,
				157	}
				158
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	159	TogetherLlama3_3_70B = Model{
				160	UserName: "together-llama3-70b",
				161	ModelName: "meta-llama/Llama-3.3-70B-Instruct-Turbo",
				162	URL: TogetherURL,
				163	Cost: ModelCost{Input: 88, Output: 88},
				164	APIKeyEnv: TogetherAPIKeyEnv,
				165	}
				166
				167	TogetherMistralSmall = Model{
				168	UserName: "together-mistral-small",
				169	ModelName: "mistralai/Mistral-Small-24B-Instruct-2501",
				170	URL: TogetherURL,
				171	Cost: ModelCost{Input: 80, Output: 80},
				172	APIKeyEnv: TogetherAPIKeyEnv,
				173	}
				174
Josh Bleecher Snyder	3e21308	2025-05-02 13:22:02 -0700	[diff] [blame]	175	TogetherQwen3 = Model{
				176	UserName: "together-qwen3",
				177	ModelName: "Qwen/Qwen3-235B-A22B-fp8-tput",
				178	URL: TogetherURL,
				179	Cost: ModelCost{Input: 20, Output: 60},
				180	APIKeyEnv: TogetherAPIKeyEnv,
				181	}
				182
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	183	TogetherGemma2 = Model{
				184	UserName: "together-gemma2",
				185	ModelName: "google/gemma-2-27b-it",
				186	URL: TogetherURL,
				187	Cost: ModelCost{Input: 80, Output: 80},
				188	APIKeyEnv: TogetherAPIKeyEnv,
				189	}
				190
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	191	LlamaCPP = Model{
				192	UserName: "llama.cpp",
				193	ModelName: "llama.cpp local model",
				194	URL: LlamaCPPURL,
				195	// zero cost
				196	Cost: ModelCost{},
				197	}
				198
				199	FireworksDeepseekV3 = Model{
				200	UserName: "fireworks-deepseek-v3",
				201	ModelName: "accounts/fireworks/models/deepseek-v3-0324",
				202	URL: FireworksURL,
				203	Cost: ModelCost{Input: 90, Output: 90}, // not entirely sure about this, they don't list pricing anywhere convenient
				204	APIKeyEnv: FireworksAPIKeyEnv,
				205	}
Josh Bleecher Snyder	fa66703	2025-05-07 14:13:27 -0700	[diff] [blame]	206
				207	MistralMedium = Model{
				208	UserName: "mistral-medium-3",
				209	ModelName: "mistral-medium-latest",
				210	URL: MistralURL,
				211	Cost: ModelCost{Input: 40, Output: 200},
				212	APIKeyEnv: MistralAPIKeyEnv,
				213	}
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	214	)
				215
				216	// Service provides chat completions.
				217	// Fields should not be altered concurrently with calling any method on Service.
				218	type Service struct {
				219	HTTPC *http.Client // defaults to http.DefaultClient if nil
				220	APIKey string // optional, if not set will try to load from env var
				221	Model Model // defaults to DefaultModel if zero value
				222	MaxTokens int // defaults to DefaultMaxTokens if zero
				223	Org string // optional - organization ID
				224	}
				225
				226	var _ llm.Service = (*Service)(nil)
				227
Philip Zeyliger	022b363	2025-05-10 06:14:21 -0700	[diff] [blame^]	228	func (s *Service) ModelName() string {
				229	return s.Model.UserName
				230	}
				231
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	232	// ModelsRegistry is a registry of all known models with their user-friendly names.
				233	var ModelsRegistry = []Model{
				234	GPT41,
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	235	GPT41Mini,
				236	GPT41Nano,
				237	GPT4o,
				238	GPT4oMini,
				239	O3,
				240	O4Mini,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	241	Gemini25Flash,
				242	Gemini25Pro,
				243	TogetherDeepseekV3,
				244	TogetherLlama4Maverick,
				245	TogetherLlama3_3_70B,
				246	TogetherMistralSmall,
Josh Bleecher Snyder	3e21308	2025-05-02 13:22:02 -0700	[diff] [blame]	247	TogetherQwen3,
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	248	TogetherGemma2,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	249	LlamaCPP,
				250	FireworksDeepseekV3,
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	251	FireworksLlama4Maverick,
				252	MistralMedium,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	253	}
				254
				255	// ListModels returns a list of all available models with their user-friendly names.
				256	func ListModels() []string {
				257	var names []string
				258	for _, model := range ModelsRegistry {
				259	if model.UserName != "" {
				260	names = append(names, model.UserName)
				261	}
				262	}
				263	return names
				264	}
				265
				266	// ModelByUserName returns a model by its user-friendly name.
				267	// Returns nil if no model with the given name is found.
				268	func ModelByUserName(name string) *Model {
				269	for _, model := range ModelsRegistry {
				270	if model.UserName == name {
				271	return &model
				272	}
				273	}
				274	return nil
				275	}
				276
				277	var (
				278	fromLLMRole = map[llm.MessageRole]string{
				279	llm.MessageRoleAssistant: "assistant",
				280	llm.MessageRoleUser: "user",
				281	}
				282	fromLLMContentType = map[llm.ContentType]string{
				283	llm.ContentTypeText: "text",
				284	llm.ContentTypeToolUse: "function", // OpenAI uses function instead of tool_call
				285	llm.ContentTypeToolResult: "tool_result",
				286	llm.ContentTypeThinking: "text", // Map thinking to text since OpenAI doesn't have thinking
				287	llm.ContentTypeRedactedThinking: "text", // Map redacted_thinking to text
				288	}
				289	fromLLMToolChoiceType = map[llm.ToolChoiceType]string{
				290	llm.ToolChoiceTypeAuto: "auto",
				291	llm.ToolChoiceTypeAny: "any",
				292	llm.ToolChoiceTypeNone: "none",
				293	llm.ToolChoiceTypeTool: "function", // OpenAI uses "function" instead of "tool"
				294	}
				295	toLLMRole = map[string]llm.MessageRole{
				296	"assistant": llm.MessageRoleAssistant,
				297	"user": llm.MessageRoleUser,
				298	}
				299	toLLMStopReason = map[string]llm.StopReason{
				300	"stop": llm.StopReasonStopSequence,
				301	"length": llm.StopReasonMaxTokens,
				302	"tool_calls": llm.StopReasonToolUse,
				303	"function_call": llm.StopReasonToolUse, // Map both to ToolUse
				304	"content_filter": llm.StopReasonStopSequence, // No direct equivalent
				305	}
				306	)
				307
				308	// fromLLMContent converts llm.Content to the format expected by OpenAI.
				309	func fromLLMContent(c llm.Content) (string, []openai.ToolCall) {
				310	switch c.Type {
				311	case llm.ContentTypeText:
				312	return c.Text, nil
				313	case llm.ContentTypeToolUse:
				314	// For OpenAI, tool use is sent as a null content with tool_calls in the message
				315	return "", []openai.ToolCall{
				316	{
				317	Type: openai.ToolTypeFunction,
				318	ID: c.ID, // Use the content ID if provided
				319	Function: openai.FunctionCall{
				320	Name: c.ToolName,
				321	Arguments: string(c.ToolInput),
				322	},
				323	},
				324	}
				325	case llm.ContentTypeToolResult:
				326	// Tool results in OpenAI are sent as a separate message with tool_call_id
				327	return c.ToolResult, nil
				328	default:
				329	// For thinking or other types, convert to text
				330	return c.Text, nil
				331	}
				332	}
				333
				334	// fromLLMMessage converts llm.Message to OpenAI ChatCompletionMessage format
				335	func fromLLMMessage(msg llm.Message) []openai.ChatCompletionMessage {
				336	// For OpenAI, we need to handle tool results differently than regular messages
				337	// Each tool result becomes its own message with role="tool"
				338
				339	var messages []openai.ChatCompletionMessage
				340
				341	// Check if this is a regular message or contains tool results
				342	var regularContent []llm.Content
				343	var toolResults []llm.Content
				344
				345	for _, c := range msg.Content {
				346	if c.Type == llm.ContentTypeToolResult {
				347	toolResults = append(toolResults, c)
				348	} else {
				349	regularContent = append(regularContent, c)
				350	}
				351	}
				352
				353	// Process tool results as separate messages, but first
				354	for _, tr := range toolResults {
				355	m := openai.ChatCompletionMessage{
				356	Role: "tool",
				357	Content: cmp.Or(tr.ToolResult, " "), // TODO: remove omitempty upstream
				358	ToolCallID: tr.ToolUseID,
				359	}
				360	messages = append(messages, m)
				361	}
				362	// Process regular content second
				363	if len(regularContent) > 0 {
				364	m := openai.ChatCompletionMessage{
				365	Role: fromLLMRole[msg.Role],
				366	}
				367
				368	// For assistant messages that contain tool calls
				369	var toolCalls []openai.ToolCall
				370	var textContent string
				371
				372	for _, c := range regularContent {
				373	content, tools := fromLLMContent(c)
				374	if len(tools) > 0 {
				375	toolCalls = append(toolCalls, tools...)
				376	} else if content != "" {
				377	if textContent != "" {
				378	textContent += "\n"
				379	}
				380	textContent += content
				381	}
				382	}
				383
				384	m.Content = textContent
				385	m.ToolCalls = toolCalls
				386
				387	messages = append(messages, m)
				388	}
				389
				390	return messages
				391	}
				392
				393	// fromLLMToolChoice converts llm.ToolChoice to the format expected by OpenAI.
				394	func fromLLMToolChoice(tc *llm.ToolChoice) any {
				395	if tc == nil {
				396	return nil
				397	}
				398
				399	if tc.Type == llm.ToolChoiceTypeTool && tc.Name != "" {
				400	return openai.ToolChoice{
				401	Type: openai.ToolTypeFunction,
				402	Function: openai.ToolFunction{
				403	Name: tc.Name,
				404	},
				405	}
				406	}
				407
				408	// For non-specific tool choice, just use the string
				409	return fromLLMToolChoiceType[tc.Type]
				410	}
				411
				412	// fromLLMTool converts llm.Tool to the format expected by OpenAI.
				413	func fromLLMTool(t *llm.Tool) openai.Tool {
				414	return openai.Tool{
				415	Type: openai.ToolTypeFunction,
				416	Function: &openai.FunctionDefinition{
				417	Name: t.Name,
				418	Description: t.Description,
				419	Parameters: t.InputSchema,
				420	},
				421	}
				422	}
				423
				424	// fromLLMSystem converts llm.SystemContent to an OpenAI system message.
				425	func fromLLMSystem(systemContent []llm.SystemContent) []openai.ChatCompletionMessage {
				426	if len(systemContent) == 0 {
				427	return nil
				428	}
				429
				430	// Combine all system content into a single system message
				431	var systemText string
				432	for i, content := range systemContent {
				433	if i > 0 && systemText != "" && content.Text != "" {
				434	systemText += "\n"
				435	}
				436	systemText += content.Text
				437	}
				438
				439	if systemText == "" {
				440	return nil
				441	}
				442
				443	return []openai.ChatCompletionMessage{
				444	{
				445	Role: "system",
				446	Content: systemText,
				447	},
				448	}
				449	}
				450
				451	// toRawLLMContent converts a raw content string from OpenAI to llm.Content.
				452	func toRawLLMContent(content string) llm.Content {
				453	return llm.Content{
				454	Type: llm.ContentTypeText,
				455	Text: content,
				456	}
				457	}
				458
				459	// toToolCallLLMContent converts a tool call from OpenAI to llm.Content.
				460	func toToolCallLLMContent(toolCall openai.ToolCall) llm.Content {
				461	// Generate a content ID if needed
				462	id := toolCall.ID
				463	if id == "" {
				464	// Create a deterministic ID based on the function name if no ID is provided
				465	id = "tc_" + toolCall.Function.Name
				466	}
				467
				468	return llm.Content{
				469	ID: id,
				470	Type: llm.ContentTypeToolUse,
				471	ToolName: toolCall.Function.Name,
				472	ToolInput: json.RawMessage(toolCall.Function.Arguments),
				473	}
				474	}
				475
				476	// toToolResultLLMContent converts a tool result message from OpenAI to llm.Content.
				477	func toToolResultLLMContent(msg openai.ChatCompletionMessage) llm.Content {
				478	return llm.Content{
				479	Type: llm.ContentTypeToolResult,
				480	ToolUseID: msg.ToolCallID,
				481	ToolResult: msg.Content,
				482	ToolError: false, // OpenAI doesn't specify errors explicitly
				483	}
				484	}
				485
				486	// toLLMContents converts message content from OpenAI to []llm.Content.
				487	func toLLMContents(msg openai.ChatCompletionMessage) []llm.Content {
				488	var contents []llm.Content
				489
				490	// If this is a tool response, handle it separately
				491	if msg.Role == "tool" && msg.ToolCallID != "" {
				492	return []llm.Content{toToolResultLLMContent(msg)}
				493	}
				494
				495	// If there's text content, add it
				496	if msg.Content != "" {
				497	contents = append(contents, toRawLLMContent(msg.Content))
				498	}
				499
				500	// If there are tool calls, add them
				501	for _, tc := range msg.ToolCalls {
				502	contents = append(contents, toToolCallLLMContent(tc))
				503	}
				504
				505	// If empty, add an empty text content
				506	if len(contents) == 0 {
				507	contents = append(contents, llm.Content{
				508	Type: llm.ContentTypeText,
				509	Text: "",
				510	})
				511	}
				512
				513	return contents
				514	}
				515
				516	// toLLMUsage converts usage information from OpenAI to llm.Usage.
Josh Bleecher Snyder	66439b0	2025-05-02 18:35:32 -0700	[diff] [blame]	517	func (s *Service) toLLMUsage(au openai.Usage) llm.Usage {
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	518	// fmt.Printf("raw usage: %+v / %v / %v\n", au, au.PromptTokensDetails, au.CompletionTokensDetails)
				519	in := uint64(au.PromptTokens)
				520	var inc uint64
				521	if au.PromptTokensDetails != nil {
				522	inc = uint64(au.PromptTokensDetails.CachedTokens)
				523	}
				524	out := uint64(au.CompletionTokens)
				525	u := llm.Usage{
				526	InputTokens: in,
				527	CacheReadInputTokens: inc,
				528	CacheCreationInputTokens: in,
				529	OutputTokens: out,
				530	}
				531	u.CostUSD = s.calculateCostFromTokens(u)
				532	return u
				533	}
				534
				535	// toLLMResponse converts the OpenAI response to llm.Response.
				536	func (s Service) toLLMResponse(r openai.ChatCompletionResponse) *llm.Response {
				537	// fmt.Printf("Raw response\n")
				538	// enc := json.NewEncoder(os.Stdout)
				539	// enc.SetIndent("", " ")
				540	// enc.Encode(r)
				541	// fmt.Printf("\n")
				542
				543	if len(r.Choices) == 0 {
				544	return &llm.Response{
				545	ID: r.ID,
				546	Model: r.Model,
				547	Role: llm.MessageRoleAssistant,
Josh Bleecher Snyder	66439b0	2025-05-02 18:35:32 -0700	[diff] [blame]	548	Usage: s.toLLMUsage(r.Usage),
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	549	}
				550	}
				551
				552	// Process the primary choice
				553	choice := r.Choices[0]
				554
				555	return &llm.Response{
				556	ID: r.ID,
				557	Model: r.Model,
				558	Role: toRoleFromString(choice.Message.Role),
				559	Content: toLLMContents(choice.Message),
				560	StopReason: toStopReason(string(choice.FinishReason)),
Josh Bleecher Snyder	66439b0	2025-05-02 18:35:32 -0700	[diff] [blame]	561	Usage: s.toLLMUsage(r.Usage),
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	562	}
				563	}
				564
				565	// toRoleFromString converts a role string to llm.MessageRole.
				566	func toRoleFromString(role string) llm.MessageRole {
				567	if role == "tool" \|\| role == "system" \|\| role == "function" {
				568	return llm.MessageRoleAssistant // Map special roles to assistant for consistency
				569	}
				570	if mr, ok := toLLMRole[role]; ok {
				571	return mr
				572	}
				573	return llm.MessageRoleUser // Default to user if unknown
				574	}
				575
				576	// toStopReason converts a finish reason string to llm.StopReason.
				577	func toStopReason(reason string) llm.StopReason {
				578	if sr, ok := toLLMStopReason[reason]; ok {
				579	return sr
				580	}
				581	return llm.StopReasonStopSequence // Default
				582	}
				583
				584	// calculateCostFromTokens calculates the cost in dollars for the given model and token counts.
				585	func (s *Service) calculateCostFromTokens(u llm.Usage) float64 {
				586	cost := s.Model.Cost
				587
				588	// TODO: check this for correctness, i am skeptical
				589	// Calculate cost in cents
				590	megaCents := u.CacheCreationInputTokens*cost.Input +
				591	u.CacheReadInputTokens*cost.CachedInput +
				592	u.OutputTokens*cost.Output
				593
				594	cents := float64(megaCents) / 1_000_000
				595	// Convert to dollars
				596	dollars := cents / 100.0
				597	// fmt.Printf("in_new=%d, in_cached=%d, out=%d, cost=%.2f\n", u.CacheCreationInputTokens, u.CacheReadInputTokens, u.OutputTokens, dollars)
				598	return dollars
				599	}
				600
				601	// Do sends a request to OpenAI using the go-openai package.
				602	func (s Service) Do(ctx context.Context, ir llm.Request) (*llm.Response, error) {
				603	// Configure the OpenAI client
				604	httpc := cmp.Or(s.HTTPC, http.DefaultClient)
				605	model := cmp.Or(s.Model, DefaultModel)
				606
				607	// TODO: do this one during Service setup? maybe with a constructor instead?
				608	config := openai.DefaultConfig(s.APIKey)
				609	if model.URL != "" {
				610	config.BaseURL = model.URL
				611	}
				612	if s.Org != "" {
				613	config.OrgID = s.Org
				614	}
				615	config.HTTPClient = httpc
				616
				617	client := openai.NewClientWithConfig(config)
				618
				619	// Start with system messages if provided
				620	var allMessages []openai.ChatCompletionMessage
				621	if len(ir.System) > 0 {
				622	sysMessages := fromLLMSystem(ir.System)
				623	allMessages = append(allMessages, sysMessages...)
				624	}
				625
				626	// Add regular and tool messages
				627	for _, msg := range ir.Messages {
				628	msgs := fromLLMMessage(msg)
				629	allMessages = append(allMessages, msgs...)
				630	}
				631
				632	// Convert tools
				633	var tools []openai.Tool
				634	for _, t := range ir.Tools {
				635	tools = append(tools, fromLLMTool(t))
				636	}
				637
				638	// Create the OpenAI request
				639	req := openai.ChatCompletionRequest{
				640	Model: model.ModelName,
				641	Messages: allMessages,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	642	Tools: tools,
				643	ToolChoice: fromLLMToolChoice(ir.ToolChoice), // TODO: make fromLLMToolChoice return an error when a perfect translation is not possible
				644	}
Josh Bleecher Snyder	8236cbc	2025-05-09 09:57:57 -0700	[diff] [blame]	645	if model.IsReasoningModel {
				646	req.MaxCompletionTokens = cmp.Or(s.MaxTokens, DefaultMaxTokens)
				647	} else {
				648	req.MaxTokens = cmp.Or(s.MaxTokens, DefaultMaxTokens)
				649	}
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	650	// fmt.Printf("Sending request to OpenAI\n")
				651	// enc := json.NewEncoder(os.Stdout)
				652	// enc.SetIndent("", " ")
				653	// enc.Encode(req)
				654	// fmt.Printf("\n")
				655
				656	// Retry mechanism
				657	backoff := []time.Duration{1 * time.Second, 2 * time.Second, 5 * time.Second}
				658
				659	// retry loop
				660	for attempts := 0; ; attempts++ {
				661	resp, err := client.CreateChatCompletion(ctx, req)
				662
				663	// Handle successful response
				664	if err == nil {
				665	return s.toLLMResponse(&resp), nil
				666	}
				667
				668	// Handle errors
				669	var apiErr *openai.APIError
				670	if ok := errors.As(err, &apiErr); !ok {
				671	// Not an OpenAI API error, return immediately
				672	return nil, err
				673	}
				674
				675	switch {
				676	case apiErr.HTTPStatusCode >= 500:
				677	// Server error, try again with backoff
				678	sleep := backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
				679	slog.WarnContext(ctx, "openai_request_failed", "error", apiErr.Error(), "status_code", apiErr.HTTPStatusCode, "sleep", sleep)
				680	time.Sleep(sleep)
				681	continue
				682
				683	case apiErr.HTTPStatusCode == 429:
				684	// Rate limited, back off longer
				685	sleep := 20*time.Second + backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
				686	slog.WarnContext(ctx, "openai_request_rate_limited", "error", apiErr.Error(), "sleep", sleep)
				687	time.Sleep(sleep)
				688	continue
				689
				690	default:
				691	// Other error, return immediately
				692	return nil, fmt.Errorf("OpenAI API error: %w", err)
				693	}
				694	}
				695	}