Blame - llm/gem/gem.go - sketch

blob: e5cbcf014cf67d0531082f434e990c7ac3c0bd28 [file] [log] [blame]

David Crawshaw	5a23406	2025-05-04 17:52:08 +0000	[diff] [blame]	1	package gem
				2
				3	import (
				4	"cmp"
				5	"context"
				6	"encoding/json"
				7	"fmt"
				8	"log/slog"
				9	"math/rand"
				10	"net/http"
				11	"strings"
				12	"time"
				13
				14	"sketch.dev/llm"
				15	"sketch.dev/llm/gem/gemini"
				16	)
				17
				18	const (
David Crawshaw	a9d87aa	2025-05-06 10:08:56 -0700	[diff] [blame]	19	DefaultModel = "gemini-2.5-pro-preview-03-25"
				20	GeminiAPIKeyEnv = "GEMINI_API_KEY"
David Crawshaw	5a23406	2025-05-04 17:52:08 +0000	[diff] [blame]	21	)
				22
				23	// Service provides Gemini completions.
				24	// Fields should not be altered concurrently with calling any method on Service.
				25	type Service struct {
David Crawshaw	a9d87aa	2025-05-06 10:08:56 -0700	[diff] [blame]	26	HTTPC *http.Client // defaults to http.DefaultClient if nil
				27	URL string // Gemini API URL, uses the gemini package default if empty
				28	APIKey string // must be non-empty
				29	Model string // defaults to DefaultModel if empty
David Crawshaw	5a23406	2025-05-04 17:52:08 +0000	[diff] [blame]	30	}
				31
				32	var _ llm.Service = (*Service)(nil)
				33
				34	// These maps convert between Sketch's llm package and Gemini API formats
				35	var fromLLMRole = map[llm.MessageRole]string{
				36	llm.MessageRoleAssistant: "model",
				37	llm.MessageRoleUser: "user",
				38	}
				39
				40	// convertToolSchemas converts Sketch's llm.Tool schemas to Gemini's schema format
				41	func convertToolSchemas(tools []*llm.Tool) ([]gemini.FunctionDeclaration, error) {
				42	if len(tools) == 0 {
				43	return nil, nil
				44	}
				45
				46	var decls []gemini.FunctionDeclaration
				47	for _, tool := range tools {
				48	// Parse the schema from raw JSON
				49	var schemaJSON map[string]any
				50	if err := json.Unmarshal(tool.InputSchema, &schemaJSON); err != nil {
				51	return nil, fmt.Errorf("failed to unmarshal tool %s schema: %w", tool.Name, err)
				52	}
				53	decls = append(decls, gemini.FunctionDeclaration{
				54	Name: tool.Name,
				55	Description: tool.Description,
				56	Parameters: convertJSONSchemaToGeminiSchema(schemaJSON),
				57	})
				58	}
				59
				60	return decls, nil
				61	}
				62
				63	// convertJSONSchemaToGeminiSchema converts a JSON schema to Gemini's schema format
				64	func convertJSONSchemaToGeminiSchema(schemaJSON map[string]any) gemini.Schema {
				65	schema := gemini.Schema{}
				66
				67	// Set the type based on the JSON schema type
				68	if typeVal, ok := schemaJSON["type"].(string); ok {
				69	switch typeVal {
				70	case "string":
				71	schema.Type = gemini.DataTypeSTRING
				72	case "number":
				73	schema.Type = gemini.DataTypeNUMBER
				74	case "integer":
				75	schema.Type = gemini.DataTypeINTEGER
				76	case "boolean":
				77	schema.Type = gemini.DataTypeBOOLEAN
				78	case "array":
				79	schema.Type = gemini.DataTypeARRAY
				80	case "object":
				81	schema.Type = gemini.DataTypeOBJECT
				82	default:
				83	schema.Type = gemini.DataTypeSTRING // Default to string for unknown types
				84	}
				85	}
				86
				87	// Set description if available
				88	if desc, ok := schemaJSON["description"].(string); ok {
				89	schema.Description = desc
				90	}
				91
				92	// Handle enum values
				93	if enumValues, ok := schemaJSON["enum"].([]any); ok {
				94	schema.Enum = make([]string, len(enumValues))
				95	for i, v := range enumValues {
				96	if strVal, ok := v.(string); ok {
				97	schema.Enum[i] = strVal
				98	} else {
				99	// Convert non-string values to string
				100	valBytes, _ := json.Marshal(v)
				101	schema.Enum[i] = string(valBytes)
				102	}
				103	}
				104	}
				105
				106	// Handle object properties
				107	if properties, ok := schemaJSON["properties"].(map[string]any); ok && schema.Type == gemini.DataTypeOBJECT {
				108	schema.Properties = make(map[string]gemini.Schema)
				109	for propName, propSchema := range properties {
				110	if propSchemaMap, ok := propSchema.(map[string]any); ok {
				111	schema.Properties[propName] = convertJSONSchemaToGeminiSchema(propSchemaMap)
				112	}
				113	}
				114	}
				115
				116	// Handle required properties
				117	if required, ok := schemaJSON["required"].([]any); ok {
				118	schema.Required = make([]string, len(required))
				119	for i, r := range required {
				120	if strVal, ok := r.(string); ok {
				121	schema.Required[i] = strVal
				122	}
				123	}
				124	}
				125
				126	// Handle array items
				127	if items, ok := schemaJSON["items"].(map[string]any); ok && schema.Type == gemini.DataTypeARRAY {
				128	itemSchema := convertJSONSchemaToGeminiSchema(items)
				129	schema.Items = &itemSchema
				130	}
				131
				132	// Handle minimum/maximum items for arrays
				133	if minItems, ok := schemaJSON["minItems"].(float64); ok {
				134	schema.MinItems = fmt.Sprintf("%d", int(minItems))
				135	}
				136	if maxItems, ok := schemaJSON["maxItems"].(float64); ok {
				137	schema.MaxItems = fmt.Sprintf("%d", int(maxItems))
				138	}
				139
				140	return schema
				141	}
				142
				143	// buildGeminiRequest converts Sketch's llm.Request to Gemini's request format
				144	func (s Service) buildGeminiRequest(req llm.Request) (*gemini.Request, error) {
				145	gemReq := &gemini.Request{}
				146
				147	// Add system instruction if provided
				148	if len(req.System) > 0 {
				149	// Combine all system messages into a single system instruction
				150	systemText := ""
				151	for i, sys := range req.System {
				152	if i > 0 && systemText != "" && sys.Text != "" {
				153	systemText += "\n"
				154	}
				155	systemText += sys.Text
				156	}
				157
				158	if systemText != "" {
				159	gemReq.SystemInstruction = &gemini.Content{
				160	Parts: []gemini.Part{{Text: systemText}},
				161	}
				162	}
				163	}
				164
				165	// Convert messages to Gemini content format
				166	for _, msg := range req.Messages {
				167	// Set the role based on the message role
				168	role, ok := fromLLMRole[msg.Role]
				169	if !ok {
				170	return nil, fmt.Errorf("unsupported message role: %v", msg.Role)
				171	}
				172
				173	content := gemini.Content{
				174	Role: role,
				175	}
				176
				177	// Store tool usage information to correlate tool uses with responses
				178	toolNameToID := make(map[string]string)
				179
				180	// First pass: collect tool use IDs for correlation
				181	for _, c := range msg.Content {
				182	if c.Type == llm.ContentTypeToolUse && c.ID != "" {
				183	toolNameToID[c.ToolName] = c.ID
				184	}
				185	}
				186
				187	// Map each content item to Gemini's format
				188	for _, c := range msg.Content {
				189	switch c.Type {
				190	case llm.ContentTypeText, llm.ContentTypeThinking, llm.ContentTypeRedactedThinking:
				191	// Simple text content
				192	content.Parts = append(content.Parts, gemini.Part{
				193	Text: c.Text,
				194	})
				195	case llm.ContentTypeToolUse:
				196	// Tool use becomes a function call
				197	var args map[string]any
				198	if err := json.Unmarshal(c.ToolInput, &args); err != nil {
				199	return nil, fmt.Errorf("failed to unmarshal tool input: %w", err)
				200	}
				201
				202	// Make sure we have a valid ID for this tool use
				203	if c.ID == "" {
				204	c.ID = fmt.Sprintf("gemini_tool_%s_%d", c.ToolName, time.Now().UnixNano())
				205	}
				206
				207	// Save the ID for this tool name for future correlation
				208	toolNameToID[c.ToolName] = c.ID
				209
				210	slog.DebugContext(context.Background(), "gemini_preparing_tool_use",
				211	"tool_name", c.ToolName,
				212	"tool_id", c.ID,
				213	"input", string(c.ToolInput))
				214
				215	content.Parts = append(content.Parts, gemini.Part{
				216	FunctionCall: &gemini.FunctionCall{
				217	Name: c.ToolName,
				218	Args: args,
				219	},
				220	})
				221	case llm.ContentTypeToolResult:
				222	// Tool result becomes a function response
				223	// Create a map for the response
				224	response := map[string]any{
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	225	"error": c.ToolError,
David Crawshaw	5a23406	2025-05-04 17:52:08 +0000	[diff] [blame]	226	}
				227
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	228	// Handle tool results: Gemini only supports string results
				229	// Combine all text content into a single string
				230	var resultText string
				231	if len(c.ToolResult) > 0 {
				232	// Collect all text from content objects
				233	texts := make([]string, 0, len(c.ToolResult))
				234	for _, result := range c.ToolResult {
				235	if result.Text != "" {
				236	texts = append(texts, result.Text)
				237	}
				238	}
				239	resultText = strings.Join(texts, "\n")
				240	}
				241	response["result"] = resultText
				242
David Crawshaw	5a23406	2025-05-04 17:52:08 +0000	[diff] [blame]	243	// Determine the function name to use - this is critical
				244	funcName := ""
				245
				246	// First try to find the function name from a stored toolUseID if we have one
				247	if c.ToolUseID != "" {
				248	// Try to derive the tool name from the previous tools we've seen
				249	for name, id := range toolNameToID {
				250	if id == c.ToolUseID {
				251	funcName = name
				252	break
				253	}
				254	}
				255	}
				256
				257	// Fallback options if we couldn't find the tool name
				258	if funcName == "" {
				259	// Try the tool name directly
				260	if c.ToolName != "" {
				261	funcName = c.ToolName
				262	} else {
				263	// Last resort fallback
				264	funcName = "default_tool"
				265	}
				266	}
				267
				268	slog.DebugContext(context.Background(), "gemini_preparing_tool_result",
				269	"tool_use_id", c.ToolUseID,
				270	"mapped_func_name", funcName,
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	271	"result_count", len(c.ToolResult))
David Crawshaw	5a23406	2025-05-04 17:52:08 +0000	[diff] [blame]	272
				273	content.Parts = append(content.Parts, gemini.Part{
				274	FunctionResponse: &gemini.FunctionResponse{
				275	Name: funcName,
				276	Response: response,
				277	},
				278	})
				279	}
				280	}
				281
				282	gemReq.Contents = append(gemReq.Contents, content)
				283	}
				284
				285	// Handle tools/functions
				286	if len(req.Tools) > 0 {
				287	// Convert tool schemas
				288	decls, err := convertToolSchemas(req.Tools)
				289	if err != nil {
				290	return nil, fmt.Errorf("failed to convert tool schemas: %w", err)
				291	}
				292	if len(decls) > 0 {
				293	gemReq.Tools = []gemini.Tool{{FunctionDeclarations: decls}}
				294	}
				295	}
				296
				297	return gemReq, nil
				298	}
				299
				300	// convertGeminiResponsesToContent converts a Gemini response to llm.Content
				301	func convertGeminiResponseToContent(res *gemini.Response) []llm.Content {
				302	if res == nil \|\| len(res.Candidates) == 0 \|\| len(res.Candidates[0].Content.Parts) == 0 {
				303	return []llm.Content{{
				304	Type: llm.ContentTypeText,
				305	Text: "",
				306	}}
				307	}
				308
				309	var contents []llm.Content
				310
				311	// Process each part in the first candidate's content
				312	for i, part := range res.Candidates[0].Content.Parts {
				313	// Log the part type for debugging
				314	slog.DebugContext(context.Background(), "processing_gemini_part",
				315	"index", i,
				316	"has_text", part.Text != "",
				317	"has_function_call", part.FunctionCall != nil,
				318	"has_function_response", part.FunctionResponse != nil)
				319
				320	if part.Text != "" {
				321	// Simple text response
				322	contents = append(contents, llm.Content{
				323	Type: llm.ContentTypeText,
				324	Text: part.Text,
				325	})
				326	} else if part.FunctionCall != nil {
				327	// Function call (tool use)
				328	args, err := json.Marshal(part.FunctionCall.Args)
				329	if err != nil {
				330	// If we can't marshal, use empty args
				331	slog.DebugContext(context.Background(), "gemini_failed_to_markshal_args",
				332	"tool_name", part.FunctionCall.Name,
				333	"args", string(args),
				334	"err", err.Error(),
				335	)
				336	args = []byte("{}")
				337	}
				338
				339	// Generate a unique ID for this tool use that includes the function name
				340	// to make it easier to correlate with responses
				341	toolID := fmt.Sprintf("gemini_tool_%s_%d", part.FunctionCall.Name, time.Now().UnixNano())
				342
				343	contents = append(contents, llm.Content{
				344	ID: toolID,
				345	Type: llm.ContentTypeToolUse,
				346	ToolName: part.FunctionCall.Name,
				347	ToolInput: json.RawMessage(args),
				348	})
				349
				350	slog.DebugContext(context.Background(), "gemini_tool_call",
				351	"tool_id", toolID,
				352	"tool_name", part.FunctionCall.Name,
				353	"args", string(args))
				354	} else if part.FunctionResponse != nil {
				355	// We shouldn't normally get function responses from the model, but just in case
				356	respData, _ := json.Marshal(part.FunctionResponse.Response)
				357	slog.DebugContext(context.Background(), "unexpected_function_response",
				358	"name", part.FunctionResponse.Name,
				359	"response", string(respData))
				360	}
				361	}
				362
				363	// If no content was added, add an empty text content
				364	if len(contents) == 0 {
				365	slog.DebugContext(context.Background(), "empty_gemini_response", "adding_empty_text", true)
				366	contents = append(contents, llm.Content{
				367	Type: llm.ContentTypeText,
				368	Text: "",
				369	})
				370	}
				371
				372	return contents
				373	}
				374
				375	// Gemini doesn't provide usage info directly, so we need to estimate it
				376	// ensureToolIDs makes sure all tool uses have proper IDs
				377	func ensureToolIDs(contents []llm.Content) {
				378	for i, content := range contents {
				379	if content.Type == llm.ContentTypeToolUse && content.ID == "" {
				380	// Generate a stable ID using the tool name and timestamp
				381	contents[i].ID = fmt.Sprintf("gemini_tool_%s_%d", content.ToolName, time.Now().UnixNano())
				382	slog.DebugContext(context.Background(), "assigned_missing_tool_id",
				383	"tool_name", content.ToolName,
				384	"new_id", contents[i].ID)
				385	}
				386	}
				387	}
				388
				389	func calculateUsage(req gemini.Request, res gemini.Response) llm.Usage {
				390	// Very rough estimation of token counts
				391	var inputTokens uint64
				392	var outputTokens uint64
				393
				394	// Count system tokens
				395	if req.SystemInstruction != nil {
				396	for _, part := range req.SystemInstruction.Parts {
				397	if part.Text != "" {
				398	// Very rough estimation: 1 token per 4 characters
				399	inputTokens += uint64(len(part.Text)) / 4
				400	}
				401	}
				402	}
				403
				404	// Count input tokens
				405	for _, content := range req.Contents {
				406	for _, part := range content.Parts {
				407	if part.Text != "" {
				408	inputTokens += uint64(len(part.Text)) / 4
				409	} else if part.FunctionCall != nil {
				410	// Estimate function call tokens
				411	argBytes, _ := json.Marshal(part.FunctionCall.Args)
				412	inputTokens += uint64(len(part.FunctionCall.Name)+len(argBytes)) / 4
				413	} else if part.FunctionResponse != nil {
				414	// Estimate function response tokens
				415	resBytes, _ := json.Marshal(part.FunctionResponse.Response)
				416	inputTokens += uint64(len(part.FunctionResponse.Name)+len(resBytes)) / 4
				417	}
				418	}
				419	}
				420
				421	// Count output tokens
				422	if res != nil && len(res.Candidates) > 0 {
				423	for _, part := range res.Candidates[0].Content.Parts {
				424	if part.Text != "" {
				425	outputTokens += uint64(len(part.Text)) / 4
				426	} else if part.FunctionCall != nil {
				427	// Estimate function call tokens
				428	argBytes, _ := json.Marshal(part.FunctionCall.Args)
				429	outputTokens += uint64(len(part.FunctionCall.Name)+len(argBytes)) / 4
				430	}
				431	}
				432	}
				433
				434	// For Gemini 2.5 Pro Preview pricing: $1.25 per 1M input tokens, $10 per 1M output tokens
				435	// Convert to dollars
				436	costUSD := float64(inputTokens)1.25/1_000_000.0 + float64(outputTokens)10/1_000_000.0
				437
				438	return llm.Usage{
				439	InputTokens: inputTokens,
				440	OutputTokens: outputTokens,
				441	CostUSD: costUSD,
				442	}
				443	}
				444
				445	// Do sends a request to Gemini.
				446	func (s Service) Do(ctx context.Context, ir llm.Request) (*llm.Response, error) {
				447	// Log the incoming request for debugging
				448	slog.DebugContext(ctx, "gemini_request",
				449	"message_count", len(ir.Messages),
				450	"tool_count", len(ir.Tools),
				451	"system_count", len(ir.System))
				452
				453	// Log tool-related information if any tools are present
				454	if len(ir.Tools) > 0 {
				455	var toolNames []string
				456	for _, tool := range ir.Tools {
				457	toolNames = append(toolNames, tool.Name)
				458	}
				459	slog.DebugContext(ctx, "gemini_tools", "tools", toolNames)
				460	}
				461
				462	// Log details about the messages being sent
				463	for i, msg := range ir.Messages {
				464	contentTypes := make([]string, len(msg.Content))
				465	for j, c := range msg.Content {
				466	contentTypes[j] = c.Type.String()
				467
				468	// Log tool-related content with more details
				469	if c.Type == llm.ContentTypeToolUse {
				470	slog.DebugContext(ctx, "gemini_tool_use",
				471	"message_idx", i,
				472	"content_idx", j,
				473	"tool_name", c.ToolName,
				474	"tool_input", string(c.ToolInput))
				475	} else if c.Type == llm.ContentTypeToolResult {
				476	slog.DebugContext(ctx, "gemini_tool_result",
				477	"message_idx", i,
				478	"content_idx", j,
				479	"tool_use_id", c.ToolUseID,
				480	"tool_error", c.ToolError,
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	481	"result_count", len(c.ToolResult))
David Crawshaw	5a23406	2025-05-04 17:52:08 +0000	[diff] [blame]	482	}
				483	}
				484	slog.DebugContext(ctx, "gemini_message",
				485	"idx", i,
				486	"role", msg.Role.String(),
				487	"content_types", contentTypes)
				488	}
				489	// Build the Gemini request
				490	gemReq, err := s.buildGeminiRequest(ir)
				491	if err != nil {
				492	return nil, fmt.Errorf("failed to build Gemini request: %w", err)
				493	}
				494
				495	// Log the structured Gemini request for debugging
				496	if reqJSON, err := json.MarshalIndent(gemReq, "", " "); err == nil {
				497	slog.DebugContext(ctx, "gemini_request_json", "request", string(reqJSON))
				498	}
				499
				500	// Create a Gemini model instance
				501	model := gemini.Model{
David Crawshaw	3659d87	2025-05-05 17:52:23 -0700	[diff] [blame]	502	Model: "models/" + cmp.Or(s.Model, DefaultModel),
				503	Endpoint: s.URL,
				504	APIKey: s.APIKey,
				505	HTTPC: cmp.Or(s.HTTPC, http.DefaultClient),
David Crawshaw	5a23406	2025-05-04 17:52:08 +0000	[diff] [blame]	506	}
				507
				508	// Send the request to Gemini with retry logic
				509	startTime := time.Now()
				510	endTime := startTime // Initialize endTime
				511	var gemRes *gemini.Response
				512
				513	// Retry mechanism for handling server errors and rate limiting
				514	backoff := []time.Duration{1 * time.Second, 3 * time.Second, 5 * time.Second, 10 * time.Second}
				515	for attempts := 0; attempts <= len(backoff); attempts++ {
				516	gemApiErr := error(nil)
				517	gemRes, gemApiErr = model.GenerateContent(ctx, gemReq)
				518	endTime = time.Now()
				519
				520	if gemApiErr == nil {
				521	// Successful response
				522	// Log the structured Gemini response
				523	if resJSON, err := json.MarshalIndent(gemRes, "", " "); err == nil {
				524	slog.DebugContext(ctx, "gemini_response_json", "response", string(resJSON))
				525	}
				526	break
				527	}
				528
				529	if attempts == len(backoff) {
				530	// We've exhausted all retry attempts
				531	return nil, fmt.Errorf("gemini: API error after %d attempts: %w", attempts, gemApiErr)
				532	}
				533
				534	// Check if the error is retryable (e.g., server error or rate limiting)
				535	if strings.Contains(gemApiErr.Error(), "429") \|\| strings.Contains(gemApiErr.Error(), "5") {
				536	// Rate limited or server error - wait and retry
				537	random := time.Duration(rand.Int63n(int64(time.Second)))
				538	sleep := backoff[attempts] + random
				539	slog.WarnContext(ctx, "gemini_request_retry", "error", gemApiErr.Error(), "attempt", attempts+1, "sleep", sleep)
				540	time.Sleep(sleep)
				541	continue
				542	}
				543
				544	// Non-retryable error
				545	return nil, fmt.Errorf("gemini: API error: %w", gemApiErr)
				546	}
				547
				548	content := convertGeminiResponseToContent(gemRes)
				549
				550	ensureToolIDs(content)
				551
				552	usage := calculateUsage(gemReq, gemRes)
				553
				554	stopReason := llm.StopReasonEndTurn
				555	for _, part := range content {
				556	if part.Type == llm.ContentTypeToolUse {
				557	stopReason = llm.StopReasonToolUse
				558	slog.DebugContext(ctx, "gemini_tool_use_detected",
				559	"setting_stop_reason", "llm.StopReasonToolUse",
				560	"tool_name", part.ToolName)
				561	break
				562	}
				563	}
				564
				565	return &llm.Response{
				566	Role: llm.MessageRoleAssistant,
				567	Model: s.Model,
				568	Content: content,
				569	StopReason: stopReason,
				570	Usage: usage,
				571	StartTime: &startTime,
				572	EndTime: &endTime,
				573	}, nil
				574	}