Blame - server/llm/llm.go - staff

blob: aa63c3e3dc9d4b8b50539fc3880e9c7e4bab420a [file] [log] [blame]

iomodo	a97eb22	2025-07-26 11:18:17 +0400	[diff] [blame]	1	package llm
				2
				3	import (
				4	"context"
				5	"time"
				6	)
				7
				8	// LLMProvider defines the interface that all LLM providers must implement
				9	type LLMProvider interface {
				10	// ChatCompletion creates a chat completion
				11	ChatCompletion(ctx context.Context, req ChatCompletionRequest) (*ChatCompletionResponse, error)
				12
				13	// CreateEmbeddings generates embeddings for the given input
				14	CreateEmbeddings(ctx context.Context, req EmbeddingRequest) (*EmbeddingResponse, error)
				15
				16	// Close performs any necessary cleanup
				17	Close() error
				18	}
				19
				20	// ProviderFactory creates LLM provider instances
				21	type ProviderFactory interface {
				22	// CreateProvider creates a new LLM provider instance
				23	CreateProvider(config Config) (LLMProvider, error)
				24
				25	// SupportsProvider checks if the factory supports the given provider
				26	SupportsProvider(provider Provider) bool
				27	}
				28
				29	// Provider represents different LLM service providers
				30	type Provider string
				31
				32	const (
				33	ProviderOpenAI Provider = "openai"
				34	ProviderXAI Provider = "xai"
				35	ProviderClaude Provider = "claude"
				36	ProviderGemini Provider = "gemini"
				37	ProviderLocal Provider = "local"
iomodo	f1ddefe	2025-07-28 09:02:05 +0400	[diff] [blame]	38	ProviderFake Provider = "fake"
iomodo	a97eb22	2025-07-26 11:18:17 +0400	[diff] [blame]	39	)
				40
				41	// Role represents the role of a message participant
				42	type Role string
				43
				44	const (
				45	RoleSystem Role = "system"
				46	RoleUser Role = "user"
				47	RoleAssistant Role = "assistant"
				48	RoleTool Role = "tool"
				49	)
				50
				51	// Message represents a single message in a conversation
				52	type Message struct {
				53	Role Role `json:"role"`
				54	Content string `json:"content"`
				55	ToolCalls []ToolCall `json:"tool_calls,omitempty"`
				56	ToolCallID string `json:"tool_call_id,omitempty"`
				57	Name string `json:"name,omitempty"`
				58	}
				59
				60	// ToolCall represents a function/tool call request
				61	type ToolCall struct {
				62	ID string `json:"id"`
				63	Type string `json:"type"`
				64	Function Function `json:"function"`
				65	}
				66
				67	// Function represents a function definition
				68	type Function struct {
				69	Name string `json:"name"`
				70	Description string `json:"description,omitempty"`
				71	Parameters map[string]interface{} `json:"parameters,omitempty"`
				72	}
				73
				74	// Tool represents a tool that can be called by the model
				75	type Tool struct {
				76	Type string `json:"type"`
				77	Function Function `json:"function"`
				78	}
				79
				80	// ChatCompletionRequest represents a request to complete a chat conversation
				81	type ChatCompletionRequest struct {
				82	Model string `json:"model"`
				83	Messages []Message `json:"messages"`
				84	MaxTokens *int `json:"max_tokens,omitempty"`
				85	Temperature *float64 `json:"temperature,omitempty"`
				86	TopP *float64 `json:"top_p,omitempty"`
				87	N *int `json:"n,omitempty"`
				88	Stream *bool `json:"stream,omitempty"`
				89	Stop []string `json:"stop,omitempty"`
				90	PresencePenalty *float64 `json:"presence_penalty,omitempty"`
				91	FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"`
				92	LogitBias map[string]int `json:"logit_bias,omitempty"`
				93	User string `json:"user,omitempty"`
				94	Tools []Tool `json:"tools,omitempty"`
				95	ToolChoice interface{} `json:"tool_choice,omitempty"`
				96	ResponseFormat *ResponseFormat `json:"response_format,omitempty"`
				97	Seed *int64 `json:"seed,omitempty"`
				98	ExtraParams map[string]interface{} `json:"-"` // For provider-specific parameters
				99	}
				100
				101	// ResponseFormat specifies the format of the response
				102	type ResponseFormat struct {
				103	Type string `json:"type"` // "text" or "json_object"
				104	}
				105
				106	// ChatCompletionResponse represents a response from a chat completion request
				107	type ChatCompletionResponse struct {
				108	ID string `json:"id"`
				109	Object string `json:"object"`
				110	Created int64 `json:"created"`
				111	Model string `json:"model"`
				112	SystemFingerprint string `json:"system_fingerprint,omitempty"`
				113	Choices []ChatCompletionChoice `json:"choices"`
				114	Usage Usage `json:"usage"`
				115	Provider Provider `json:"provider"`
				116	}
				117
				118	// ChatCompletionChoice represents a single choice in a chat completion response
				119	type ChatCompletionChoice struct {
				120	Index int `json:"index"`
				121	Message Message `json:"message"`
				122	Logprobs *Logprobs `json:"logprobs,omitempty"`
				123	FinishReason string `json:"finish_reason"`
				124	Delta *Message `json:"delta,omitempty"` // For streaming
				125	ExtraData map[string]interface{} `json:"-"` // For provider-specific data
				126	}
				127
				128	// Logprobs represents log probability information
				129	type Logprobs struct {
				130	Content []LogprobContent `json:"content,omitempty"`
				131	}
				132
				133	// LogprobContent represents content with log probabilities
				134	type LogprobContent struct {
				135	Token string `json:"token"`
				136	Logprob float64 `json:"logprob"`
				137	Bytes []int `json:"bytes,omitempty"`
				138	TopLogprobs []TopLogprob `json:"top_logprobs,omitempty"`
				139	}
				140
				141	// TopLogprob represents a top log probability
				142	type TopLogprob struct {
				143	Token string `json:"token"`
				144	Logprob float64 `json:"logprob"`
				145	Bytes []int `json:"bytes,omitempty"`
				146	}
				147
				148	// Usage represents token usage information
				149	type Usage struct {
				150	PromptTokens int `json:"prompt_tokens"`
				151	CompletionTokens int `json:"completion_tokens"`
				152	TotalTokens int `json:"total_tokens"`
				153	}
				154
				155	// EmbeddingRequest represents a request to generate embeddings
				156	type EmbeddingRequest struct {
				157	Input interface{} `json:"input"` // string, []string, or []int
				158	Model string `json:"model"`
				159	EncodingFormat string `json:"encoding_format,omitempty"`
				160	Dimensions *int `json:"dimensions,omitempty"`
				161	User string `json:"user,omitempty"`
				162	ExtraParams map[string]interface{} `json:"-"` // For provider-specific parameters
				163	}
				164
				165	// EmbeddingResponse represents a response from an embedding request
				166	type EmbeddingResponse struct {
				167	Object string `json:"object"`
				168	Data []Embedding `json:"data"`
				169	Usage Usage `json:"usage"`
				170	Model string `json:"model"`
				171	Provider Provider `json:"provider"`
				172	}
				173
				174	// Embedding represents a single embedding
				175	type Embedding struct {
				176	Object string `json:"object"`
				177	Embedding []float64 `json:"embedding"`
				178	Index int `json:"index"`
				179	}
				180
				181	// ModelInfo represents information about an available model
				182	type ModelInfo struct {
				183	ID string `json:"id"`
				184	Object string `json:"object"`
				185	Created int64 `json:"created"`
				186	OwnedBy string `json:"owned_by"`
				187	Permission []ModelPermission `json:"permission"`
				188	Root string `json:"root"`
				189	Parent string `json:"parent"`
				190	Provider Provider `json:"provider"`
				191	ExtraData map[string]interface{} `json:"-"` // For provider-specific data
				192	}
				193
				194	// ModelPermission represents permissions for a model
				195	type ModelPermission struct {
				196	ID string `json:"id"`
				197	Object string `json:"object"`
				198	Created int64 `json:"created"`
				199	AllowCreateEngine bool `json:"allow_create_engine"`
				200	AllowSampling bool `json:"allow_sampling"`
				201	AllowLogprobs bool `json:"allow_logprobs"`
				202	AllowSearchIndices bool `json:"allow_search_indices"`
				203	AllowView bool `json:"allow_view"`
				204	AllowFineTuning bool `json:"allow_fine_tuning"`
				205	Organization string `json:"organization"`
				206	Group string `json:"group"`
				207	IsBlocking bool `json:"is_blocking"`
				208	}
				209
				210	// Error represents an error response from an LLM provider
				211	type Error struct {
				212	Error struct {
				213	Message string `json:"message"`
				214	Type string `json:"type"`
				215	Code string `json:"code,omitempty"`
				216	Param string `json:"param,omitempty"`
				217	} `json:"error"`
				218	}
				219
				220	// Config represents configuration for an LLM provider
				221	type Config struct {
				222	Provider Provider `json:"provider"`
				223	APIKey string `json:"api_key"`
				224	BaseURL string `json:"base_url,omitempty"`
				225	Timeout time.Duration `json:"timeout,omitempty"`
				226	MaxRetries int `json:"max_retries,omitempty"`
				227	ExtraConfig map[string]interface{} `json:"extra_config,omitempty"`
				228	}
				229
				230	// StreamResponse represents a streaming response chunk
				231	type StreamResponse struct {
				232	ID string `json:"id"`
				233	Object string `json:"object"`
				234	Created int64 `json:"created"`
				235	Model string `json:"model"`
				236	SystemFingerprint string `json:"system_fingerprint,omitempty"`
				237	Choices []ChatCompletionChoice `json:"choices"`
				238	Usage *Usage `json:"usage,omitempty"`
				239	Provider Provider `json:"provider"`
				240	}
				241
				242	// DefaultConfigs provides default configurations for different providers
				243	var DefaultConfigs = map[Provider]Config{
				244	ProviderOpenAI: {
				245	Provider: ProviderOpenAI,
				246	BaseURL: "https://api.openai.com/v1",
				247	Timeout: 30 * time.Second,
				248	MaxRetries: 3,
				249	},
				250	ProviderXAI: {
				251	Provider: ProviderXAI,
				252	BaseURL: "https://api.x.ai/v1",
				253	Timeout: 30 * time.Second,
				254	MaxRetries: 3,
				255	},
				256	ProviderClaude: {
				257	Provider: ProviderClaude,
				258	BaseURL: "https://api.anthropic.com/v1",
				259	Timeout: 30 * time.Second,
				260	MaxRetries: 3,
				261	},
				262	ProviderGemini: {
				263	Provider: ProviderGemini,
				264	BaseURL: "https://generativelanguage.googleapis.com/v1",
				265	Timeout: 30 * time.Second,
				266	MaxRetries: 3,
				267	},
				268	ProviderLocal: {
				269	Provider: ProviderLocal,
				270	BaseURL: "http://localhost:11434",
				271	Timeout: 60 * time.Second,
				272	MaxRetries: 1,
				273	},
iomodo	f1ddefe	2025-07-28 09:02:05 +0400	[diff] [blame]	274	ProviderFake: {
				275	Provider: ProviderFake,
				276	BaseURL: "fake://test",
				277	Timeout: 1 * time.Second,
				278	MaxRetries: 0,
				279	},
iomodo	a97eb22	2025-07-26 11:18:17 +0400	[diff] [blame]	280	}