Blame - llm/oai/oai.go - sketch

blob: 3e772ab4ac1c5b29c1f3fb5e669cf8a190ceb476 [file] [log] [blame]

Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame^]	1	package oai
				2
				3	import (
				4	"cmp"
				5	"context"
				6	"encoding/json"
				7	"errors"
				8	"fmt"
				9	"log/slog"
				10	"math/rand/v2"
				11	"net/http"
				12	"time"
				13
				14	"github.com/sashabaranov/go-openai"
				15	"sketch.dev/llm"
				16	)
				17
				18	const (
				19	DefaultMaxTokens = 8192
				20
				21	OpenAIURL = "https://api.openai.com/v1"
				22	FireworksURL = "https://api.fireworks.ai/inference/v1"
				23	LlamaCPPURL = "http://localhost:8080/v1"
				24	TogetherURL = "https://api.together.xyz/v1"
				25	GeminiURL = "https://generativelanguage.googleapis.com/v1beta/openai/"
				26
				27	// Environment variable names for API keys
				28	OpenAIAPIKeyEnv = "OPENAI_API_KEY"
				29	FireworksAPIKeyEnv = "FIREWORKS_API_KEY"
				30	TogetherAPIKeyEnv = "TOGETHER_API_KEY"
				31	GeminiAPIKeyEnv = "GEMINI_API_KEY"
				32	)
				33
				34	type Model struct {
				35	UserName string // provided by the user to identify this model (e.g. "gpt4.1")
				36	ModelName string // provided to the service provide to specify which model to use (e.g. "gpt-4.1-2025-04-14")
				37	URL string
				38	Cost ModelCost
				39	APIKeyEnv string // environment variable name for the API key
				40	}
				41
				42	type ModelCost struct {
				43	Input uint64 // in cents per million tokens
				44	CachedInput uint64 // in cents per million tokens
				45	Output uint64 // in cents per million tokens
				46	}
				47
				48	var (
				49	DefaultModel = GPT41
				50
				51	GPT41 = Model{
				52	UserName: "gpt4.1",
				53	ModelName: "gpt-4.1-2025-04-14",
				54	URL: OpenAIURL,
				55	Cost: ModelCost{Input: 200, CachedInput: 50, Output: 800},
				56	APIKeyEnv: OpenAIAPIKeyEnv,
				57	}
				58
				59	Gemini25Flash = Model{
				60	UserName: "gemini-flash-2.5",
				61	ModelName: "gemini-2.5-flash-preview-04-17",
				62	URL: GeminiURL,
				63	Cost: ModelCost{Input: 15, Output: 60},
				64	APIKeyEnv: GeminiAPIKeyEnv,
				65	}
				66
				67	Gemini25Pro = Model{
				68	UserName: "gemini-pro-2.5",
				69	ModelName: "gemini-2.5-pro-preview-03-25",
				70	URL: GeminiURL,
				71	// GRRRR. Really??
				72	// Input is: $1.25, prompts <= 200k tokens, $2.50, prompts > 200k tokens
				73	// Output is: $10.00, prompts <= 200k tokens, $15.00, prompts > 200k
				74	// Caching is: $0.31, prompts <= 200k tokens, $0.625, prompts > 200k, $4.50 / 1,000,000 tokens per hour
				75	// Whatever that means. Are we caching? I have no idea.
				76	// How do you always manage to be the annoying one, Google?
				77	// I'm not complicating things just for you.
				78	Cost: ModelCost{Input: 125, Output: 1000},
				79	APIKeyEnv: GeminiAPIKeyEnv,
				80	}
				81
				82	TogetherDeepseekV3 = Model{
				83	UserName: "together-deepseek-v3",
				84	ModelName: "deepseek-ai/DeepSeek-V3",
				85	URL: TogetherURL,
				86	Cost: ModelCost{Input: 125, Output: 125},
				87	APIKeyEnv: TogetherAPIKeyEnv,
				88	}
				89
				90	TogetherLlama4Maverick = Model{
				91	UserName: "together-llama4-maverick",
				92	ModelName: "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
				93	URL: TogetherURL,
				94	Cost: ModelCost{Input: 27, Output: 85},
				95	APIKeyEnv: TogetherAPIKeyEnv,
				96	}
				97
				98	TogetherLlama3_3_70B = Model{
				99	UserName: "together-llama3-70b",
				100	ModelName: "meta-llama/Llama-3.3-70B-Instruct-Turbo",
				101	URL: TogetherURL,
				102	Cost: ModelCost{Input: 88, Output: 88},
				103	APIKeyEnv: TogetherAPIKeyEnv,
				104	}
				105
				106	TogetherMistralSmall = Model{
				107	UserName: "together-mistral-small",
				108	ModelName: "mistralai/Mistral-Small-24B-Instruct-2501",
				109	URL: TogetherURL,
				110	Cost: ModelCost{Input: 80, Output: 80},
				111	APIKeyEnv: TogetherAPIKeyEnv,
				112	}
				113
				114	LlamaCPP = Model{
				115	UserName: "llama.cpp",
				116	ModelName: "llama.cpp local model",
				117	URL: LlamaCPPURL,
				118	// zero cost
				119	Cost: ModelCost{},
				120	}
				121
				122	FireworksDeepseekV3 = Model{
				123	UserName: "fireworks-deepseek-v3",
				124	ModelName: "accounts/fireworks/models/deepseek-v3-0324",
				125	URL: FireworksURL,
				126	Cost: ModelCost{Input: 90, Output: 90}, // not entirely sure about this, they don't list pricing anywhere convenient
				127	APIKeyEnv: FireworksAPIKeyEnv,
				128	}
				129	)
				130
				131	// Service provides chat completions.
				132	// Fields should not be altered concurrently with calling any method on Service.
				133	type Service struct {
				134	HTTPC *http.Client // defaults to http.DefaultClient if nil
				135	APIKey string // optional, if not set will try to load from env var
				136	Model Model // defaults to DefaultModel if zero value
				137	MaxTokens int // defaults to DefaultMaxTokens if zero
				138	Org string // optional - organization ID
				139	}
				140
				141	var _ llm.Service = (*Service)(nil)
				142
				143	// ModelsRegistry is a registry of all known models with their user-friendly names.
				144	var ModelsRegistry = []Model{
				145	GPT41,
				146	Gemini25Flash,
				147	Gemini25Pro,
				148	TogetherDeepseekV3,
				149	TogetherLlama4Maverick,
				150	TogetherLlama3_3_70B,
				151	TogetherMistralSmall,
				152	LlamaCPP,
				153	FireworksDeepseekV3,
				154	}
				155
				156	// ListModels returns a list of all available models with their user-friendly names.
				157	func ListModels() []string {
				158	var names []string
				159	for _, model := range ModelsRegistry {
				160	if model.UserName != "" {
				161	names = append(names, model.UserName)
				162	}
				163	}
				164	return names
				165	}
				166
				167	// ModelByUserName returns a model by its user-friendly name.
				168	// Returns nil if no model with the given name is found.
				169	func ModelByUserName(name string) *Model {
				170	for _, model := range ModelsRegistry {
				171	if model.UserName == name {
				172	return &model
				173	}
				174	}
				175	return nil
				176	}
				177
				178	var (
				179	fromLLMRole = map[llm.MessageRole]string{
				180	llm.MessageRoleAssistant: "assistant",
				181	llm.MessageRoleUser: "user",
				182	}
				183	fromLLMContentType = map[llm.ContentType]string{
				184	llm.ContentTypeText: "text",
				185	llm.ContentTypeToolUse: "function", // OpenAI uses function instead of tool_call
				186	llm.ContentTypeToolResult: "tool_result",
				187	llm.ContentTypeThinking: "text", // Map thinking to text since OpenAI doesn't have thinking
				188	llm.ContentTypeRedactedThinking: "text", // Map redacted_thinking to text
				189	}
				190	fromLLMToolChoiceType = map[llm.ToolChoiceType]string{
				191	llm.ToolChoiceTypeAuto: "auto",
				192	llm.ToolChoiceTypeAny: "any",
				193	llm.ToolChoiceTypeNone: "none",
				194	llm.ToolChoiceTypeTool: "function", // OpenAI uses "function" instead of "tool"
				195	}
				196	toLLMRole = map[string]llm.MessageRole{
				197	"assistant": llm.MessageRoleAssistant,
				198	"user": llm.MessageRoleUser,
				199	}
				200	toLLMStopReason = map[string]llm.StopReason{
				201	"stop": llm.StopReasonStopSequence,
				202	"length": llm.StopReasonMaxTokens,
				203	"tool_calls": llm.StopReasonToolUse,
				204	"function_call": llm.StopReasonToolUse, // Map both to ToolUse
				205	"content_filter": llm.StopReasonStopSequence, // No direct equivalent
				206	}
				207	)
				208
				209	// fromLLMContent converts llm.Content to the format expected by OpenAI.
				210	func fromLLMContent(c llm.Content) (string, []openai.ToolCall) {
				211	switch c.Type {
				212	case llm.ContentTypeText:
				213	return c.Text, nil
				214	case llm.ContentTypeToolUse:
				215	// For OpenAI, tool use is sent as a null content with tool_calls in the message
				216	return "", []openai.ToolCall{
				217	{
				218	Type: openai.ToolTypeFunction,
				219	ID: c.ID, // Use the content ID if provided
				220	Function: openai.FunctionCall{
				221	Name: c.ToolName,
				222	Arguments: string(c.ToolInput),
				223	},
				224	},
				225	}
				226	case llm.ContentTypeToolResult:
				227	// Tool results in OpenAI are sent as a separate message with tool_call_id
				228	return c.ToolResult, nil
				229	default:
				230	// For thinking or other types, convert to text
				231	return c.Text, nil
				232	}
				233	}
				234
				235	// fromLLMMessage converts llm.Message to OpenAI ChatCompletionMessage format
				236	func fromLLMMessage(msg llm.Message) []openai.ChatCompletionMessage {
				237	// For OpenAI, we need to handle tool results differently than regular messages
				238	// Each tool result becomes its own message with role="tool"
				239
				240	var messages []openai.ChatCompletionMessage
				241
				242	// Check if this is a regular message or contains tool results
				243	var regularContent []llm.Content
				244	var toolResults []llm.Content
				245
				246	for _, c := range msg.Content {
				247	if c.Type == llm.ContentTypeToolResult {
				248	toolResults = append(toolResults, c)
				249	} else {
				250	regularContent = append(regularContent, c)
				251	}
				252	}
				253
				254	// Process tool results as separate messages, but first
				255	for _, tr := range toolResults {
				256	m := openai.ChatCompletionMessage{
				257	Role: "tool",
				258	Content: cmp.Or(tr.ToolResult, " "), // TODO: remove omitempty upstream
				259	ToolCallID: tr.ToolUseID,
				260	}
				261	messages = append(messages, m)
				262	}
				263	// Process regular content second
				264	if len(regularContent) > 0 {
				265	m := openai.ChatCompletionMessage{
				266	Role: fromLLMRole[msg.Role],
				267	}
				268
				269	// For assistant messages that contain tool calls
				270	var toolCalls []openai.ToolCall
				271	var textContent string
				272
				273	for _, c := range regularContent {
				274	content, tools := fromLLMContent(c)
				275	if len(tools) > 0 {
				276	toolCalls = append(toolCalls, tools...)
				277	} else if content != "" {
				278	if textContent != "" {
				279	textContent += "\n"
				280	}
				281	textContent += content
				282	}
				283	}
				284
				285	m.Content = textContent
				286	m.ToolCalls = toolCalls
				287
				288	messages = append(messages, m)
				289	}
				290
				291	return messages
				292	}
				293
				294	// fromLLMToolChoice converts llm.ToolChoice to the format expected by OpenAI.
				295	func fromLLMToolChoice(tc *llm.ToolChoice) any {
				296	if tc == nil {
				297	return nil
				298	}
				299
				300	if tc.Type == llm.ToolChoiceTypeTool && tc.Name != "" {
				301	return openai.ToolChoice{
				302	Type: openai.ToolTypeFunction,
				303	Function: openai.ToolFunction{
				304	Name: tc.Name,
				305	},
				306	}
				307	}
				308
				309	// For non-specific tool choice, just use the string
				310	return fromLLMToolChoiceType[tc.Type]
				311	}
				312
				313	// fromLLMTool converts llm.Tool to the format expected by OpenAI.
				314	func fromLLMTool(t *llm.Tool) openai.Tool {
				315	return openai.Tool{
				316	Type: openai.ToolTypeFunction,
				317	Function: &openai.FunctionDefinition{
				318	Name: t.Name,
				319	Description: t.Description,
				320	Parameters: t.InputSchema,
				321	},
				322	}
				323	}
				324
				325	// fromLLMSystem converts llm.SystemContent to an OpenAI system message.
				326	func fromLLMSystem(systemContent []llm.SystemContent) []openai.ChatCompletionMessage {
				327	if len(systemContent) == 0 {
				328	return nil
				329	}
				330
				331	// Combine all system content into a single system message
				332	var systemText string
				333	for i, content := range systemContent {
				334	if i > 0 && systemText != "" && content.Text != "" {
				335	systemText += "\n"
				336	}
				337	systemText += content.Text
				338	}
				339
				340	if systemText == "" {
				341	return nil
				342	}
				343
				344	return []openai.ChatCompletionMessage{
				345	{
				346	Role: "system",
				347	Content: systemText,
				348	},
				349	}
				350	}
				351
				352	// toRawLLMContent converts a raw content string from OpenAI to llm.Content.
				353	func toRawLLMContent(content string) llm.Content {
				354	return llm.Content{
				355	Type: llm.ContentTypeText,
				356	Text: content,
				357	}
				358	}
				359
				360	// toToolCallLLMContent converts a tool call from OpenAI to llm.Content.
				361	func toToolCallLLMContent(toolCall openai.ToolCall) llm.Content {
				362	// Generate a content ID if needed
				363	id := toolCall.ID
				364	if id == "" {
				365	// Create a deterministic ID based on the function name if no ID is provided
				366	id = "tc_" + toolCall.Function.Name
				367	}
				368
				369	return llm.Content{
				370	ID: id,
				371	Type: llm.ContentTypeToolUse,
				372	ToolName: toolCall.Function.Name,
				373	ToolInput: json.RawMessage(toolCall.Function.Arguments),
				374	}
				375	}
				376
				377	// toToolResultLLMContent converts a tool result message from OpenAI to llm.Content.
				378	func toToolResultLLMContent(msg openai.ChatCompletionMessage) llm.Content {
				379	return llm.Content{
				380	Type: llm.ContentTypeToolResult,
				381	ToolUseID: msg.ToolCallID,
				382	ToolResult: msg.Content,
				383	ToolError: false, // OpenAI doesn't specify errors explicitly
				384	}
				385	}
				386
				387	// toLLMContents converts message content from OpenAI to []llm.Content.
				388	func toLLMContents(msg openai.ChatCompletionMessage) []llm.Content {
				389	var contents []llm.Content
				390
				391	// If this is a tool response, handle it separately
				392	if msg.Role == "tool" && msg.ToolCallID != "" {
				393	return []llm.Content{toToolResultLLMContent(msg)}
				394	}
				395
				396	// If there's text content, add it
				397	if msg.Content != "" {
				398	contents = append(contents, toRawLLMContent(msg.Content))
				399	}
				400
				401	// If there are tool calls, add them
				402	for _, tc := range msg.ToolCalls {
				403	contents = append(contents, toToolCallLLMContent(tc))
				404	}
				405
				406	// If empty, add an empty text content
				407	if len(contents) == 0 {
				408	contents = append(contents, llm.Content{
				409	Type: llm.ContentTypeText,
				410	Text: "",
				411	})
				412	}
				413
				414	return contents
				415	}
				416
				417	// toLLMUsage converts usage information from OpenAI to llm.Usage.
				418	func (s *Service) toLLMUsage(model string, au openai.Usage) llm.Usage {
				419	// fmt.Printf("raw usage: %+v / %v / %v\n", au, au.PromptTokensDetails, au.CompletionTokensDetails)
				420	in := uint64(au.PromptTokens)
				421	var inc uint64
				422	if au.PromptTokensDetails != nil {
				423	inc = uint64(au.PromptTokensDetails.CachedTokens)
				424	}
				425	out := uint64(au.CompletionTokens)
				426	u := llm.Usage{
				427	InputTokens: in,
				428	CacheReadInputTokens: inc,
				429	CacheCreationInputTokens: in,
				430	OutputTokens: out,
				431	}
				432	u.CostUSD = s.calculateCostFromTokens(u)
				433	return u
				434	}
				435
				436	// toLLMResponse converts the OpenAI response to llm.Response.
				437	func (s Service) toLLMResponse(r openai.ChatCompletionResponse) *llm.Response {
				438	// fmt.Printf("Raw response\n")
				439	// enc := json.NewEncoder(os.Stdout)
				440	// enc.SetIndent("", " ")
				441	// enc.Encode(r)
				442	// fmt.Printf("\n")
				443
				444	if len(r.Choices) == 0 {
				445	return &llm.Response{
				446	ID: r.ID,
				447	Model: r.Model,
				448	Role: llm.MessageRoleAssistant,
				449	Usage: s.toLLMUsage(r.Model, r.Usage),
				450	}
				451	}
				452
				453	// Process the primary choice
				454	choice := r.Choices[0]
				455
				456	return &llm.Response{
				457	ID: r.ID,
				458	Model: r.Model,
				459	Role: toRoleFromString(choice.Message.Role),
				460	Content: toLLMContents(choice.Message),
				461	StopReason: toStopReason(string(choice.FinishReason)),
				462	Usage: s.toLLMUsage(r.Model, r.Usage),
				463	}
				464	}
				465
				466	// toRoleFromString converts a role string to llm.MessageRole.
				467	func toRoleFromString(role string) llm.MessageRole {
				468	if role == "tool" \|\| role == "system" \|\| role == "function" {
				469	return llm.MessageRoleAssistant // Map special roles to assistant for consistency
				470	}
				471	if mr, ok := toLLMRole[role]; ok {
				472	return mr
				473	}
				474	return llm.MessageRoleUser // Default to user if unknown
				475	}
				476
				477	// toStopReason converts a finish reason string to llm.StopReason.
				478	func toStopReason(reason string) llm.StopReason {
				479	if sr, ok := toLLMStopReason[reason]; ok {
				480	return sr
				481	}
				482	return llm.StopReasonStopSequence // Default
				483	}
				484
				485	// calculateCostFromTokens calculates the cost in dollars for the given model and token counts.
				486	func (s *Service) calculateCostFromTokens(u llm.Usage) float64 {
				487	cost := s.Model.Cost
				488
				489	// TODO: check this for correctness, i am skeptical
				490	// Calculate cost in cents
				491	megaCents := u.CacheCreationInputTokens*cost.Input +
				492	u.CacheReadInputTokens*cost.CachedInput +
				493	u.OutputTokens*cost.Output
				494
				495	cents := float64(megaCents) / 1_000_000
				496	// Convert to dollars
				497	dollars := cents / 100.0
				498	// fmt.Printf("in_new=%d, in_cached=%d, out=%d, cost=%.2f\n", u.CacheCreationInputTokens, u.CacheReadInputTokens, u.OutputTokens, dollars)
				499	return dollars
				500	}
				501
				502	// Do sends a request to OpenAI using the go-openai package.
				503	func (s Service) Do(ctx context.Context, ir llm.Request) (*llm.Response, error) {
				504	// Configure the OpenAI client
				505	httpc := cmp.Or(s.HTTPC, http.DefaultClient)
				506	model := cmp.Or(s.Model, DefaultModel)
				507
				508	// TODO: do this one during Service setup? maybe with a constructor instead?
				509	config := openai.DefaultConfig(s.APIKey)
				510	if model.URL != "" {
				511	config.BaseURL = model.URL
				512	}
				513	if s.Org != "" {
				514	config.OrgID = s.Org
				515	}
				516	config.HTTPClient = httpc
				517
				518	client := openai.NewClientWithConfig(config)
				519
				520	// Start with system messages if provided
				521	var allMessages []openai.ChatCompletionMessage
				522	if len(ir.System) > 0 {
				523	sysMessages := fromLLMSystem(ir.System)
				524	allMessages = append(allMessages, sysMessages...)
				525	}
				526
				527	// Add regular and tool messages
				528	for _, msg := range ir.Messages {
				529	msgs := fromLLMMessage(msg)
				530	allMessages = append(allMessages, msgs...)
				531	}
				532
				533	// Convert tools
				534	var tools []openai.Tool
				535	for _, t := range ir.Tools {
				536	tools = append(tools, fromLLMTool(t))
				537	}
				538
				539	// Create the OpenAI request
				540	req := openai.ChatCompletionRequest{
				541	Model: model.ModelName,
				542	Messages: allMessages,
				543	MaxTokens: cmp.Or(s.MaxTokens, DefaultMaxTokens),
				544	Tools: tools,
				545	ToolChoice: fromLLMToolChoice(ir.ToolChoice), // TODO: make fromLLMToolChoice return an error when a perfect translation is not possible
				546	}
				547	// fmt.Printf("Sending request to OpenAI\n")
				548	// enc := json.NewEncoder(os.Stdout)
				549	// enc.SetIndent("", " ")
				550	// enc.Encode(req)
				551	// fmt.Printf("\n")
				552
				553	// Retry mechanism
				554	backoff := []time.Duration{1 * time.Second, 2 * time.Second, 5 * time.Second}
				555
				556	// retry loop
				557	for attempts := 0; ; attempts++ {
				558	resp, err := client.CreateChatCompletion(ctx, req)
				559
				560	// Handle successful response
				561	if err == nil {
				562	return s.toLLMResponse(&resp), nil
				563	}
				564
				565	// Handle errors
				566	var apiErr *openai.APIError
				567	if ok := errors.As(err, &apiErr); !ok {
				568	// Not an OpenAI API error, return immediately
				569	return nil, err
				570	}
				571
				572	switch {
				573	case apiErr.HTTPStatusCode >= 500:
				574	// Server error, try again with backoff
				575	sleep := backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
				576	slog.WarnContext(ctx, "openai_request_failed", "error", apiErr.Error(), "status_code", apiErr.HTTPStatusCode, "sleep", sleep)
				577	time.Sleep(sleep)
				578	continue
				579
				580	case apiErr.HTTPStatusCode == 429:
				581	// Rate limited, back off longer
				582	sleep := 20*time.Second + backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
				583	slog.WarnContext(ctx, "openai_request_rate_limited", "error", apiErr.Error(), "sleep", sleep)
				584	time.Sleep(sleep)
				585	continue
				586
				587	default:
				588	// Other error, return immediately
				589	return nil, fmt.Errorf("OpenAI API error: %w", err)
				590	}
				591	}
				592	}