blob: 124f2fcc7cd775ac0bae23c9a13ae5f2317b6a3f [file] [log] [blame]
Earl Lee2e463fb2025-04-17 11:22:22 -07001package loop
2
3import (
Josh Bleecher Snyder4d5e9972025-05-01 15:56:37 -07004 "cmp"
Earl Lee2e463fb2025-04-17 11:22:22 -07005 "context"
Sean McCullough9f4b8082025-04-30 17:34:07 +00006 "fmt"
Earl Lee2e463fb2025-04-17 11:22:22 -07007 "net/http"
8 "os"
Sean McCullough96b60dd2025-04-30 09:49:10 -07009 "slices"
Earl Lee2e463fb2025-04-17 11:22:22 -070010 "strings"
11 "testing"
12 "time"
13
Earl Lee2e463fb2025-04-17 11:22:22 -070014 "sketch.dev/httprr"
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070015 "sketch.dev/llm"
16 "sketch.dev/llm/ant"
17 "sketch.dev/llm/conversation"
Earl Lee2e463fb2025-04-17 11:22:22 -070018)
19
20// TestAgentLoop tests that the Agent loop functionality works correctly.
21// It uses the httprr package to record HTTP interactions for replay in tests.
22// When failing, rebuild with "go test ./sketch/loop -run TestAgentLoop -httprecord .*agent_loop.*"
23// as necessary.
24func TestAgentLoop(t *testing.T) {
25 ctx := context.Background()
26
27 // Setup httprr recorder
28 rrPath := "testdata/agent_loop.httprr"
29 rr, err := httprr.Open(rrPath, http.DefaultTransport)
30 if err != nil && !os.IsNotExist(err) {
31 t.Fatal(err)
32 }
33
34 if rr.Recording() {
35 // Skip the test if API key is not available
36 if os.Getenv("ANTHROPIC_API_KEY") == "" {
37 t.Fatal("ANTHROPIC_API_KEY not set, required for HTTP recording")
38 }
39 }
40
41 // Create HTTP client
42 var client *http.Client
43 if rr != nil {
44 // Scrub API keys from requests for security
45 rr.ScrubReq(func(req *http.Request) error {
46 req.Header.Del("x-api-key")
47 req.Header.Del("anthropic-api-key")
48 return nil
49 })
50 client = rr.Client()
51 } else {
52 client = &http.Client{Transport: http.DefaultTransport}
53 }
54
55 // Create a new agent with the httprr client
56 origWD, err := os.Getwd()
57 if err != nil {
58 t.Fatal(err)
59 }
60 if err := os.Chdir("/"); err != nil {
61 t.Fatal(err)
62 }
Philip Zeyligere6c294d2025-06-04 16:55:21 +000063 budget := conversation.Budget{MaxDollars: 10.0}
Earl Lee2e463fb2025-04-17 11:22:22 -070064 wd, err := os.Getwd()
65 if err != nil {
66 t.Fatal(err)
67 }
68
David Crawshaw3659d872025-05-05 17:52:23 -070069 apiKey := cmp.Or(os.Getenv("OUTER_SKETCH_MODEL_API_KEY"), os.Getenv("ANTHROPIC_API_KEY"))
Earl Lee2e463fb2025-04-17 11:22:22 -070070 cfg := AgentConfig{
Philip Zeyligerbc8c8dc2025-05-21 13:19:13 -070071 Context: ctx,
72 WorkingDir: wd,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070073 Service: &ant.Service{
74 APIKey: apiKey,
75 HTTPC: client,
76 },
Earl Lee2e463fb2025-04-17 11:22:22 -070077 Budget: budget,
78 GitUsername: "Test Agent",
79 GitEmail: "totallyhuman@sketch.dev",
80 SessionID: "test-session-id",
81 ClientGOOS: "linux",
82 ClientGOARCH: "amd64",
83 }
84 agent := NewAgent(cfg)
Josh Bleecher Snyder8a0de522025-07-24 19:29:07 +000085
86 // Use fixed time for deterministic tests
87 fixedTime := time.Date(2025, 7, 25, 19, 37, 57, 0, time.UTC)
88 agent.now = func() time.Time { return fixedTime }
89
Earl Lee2e463fb2025-04-17 11:22:22 -070090 if err := os.Chdir(origWD); err != nil {
91 t.Fatal(err)
92 }
Philip Zeyligerbc8c8dc2025-05-21 13:19:13 -070093 err = agent.Init(AgentInit{NoGit: true})
Earl Lee2e463fb2025-04-17 11:22:22 -070094 if err != nil {
95 t.Fatal(err)
96 }
97
98 // Setup a test message that will trigger a simple, predictable response
Josh Bleecher Snyder3b44cc32025-07-22 02:28:14 +000099 userMessage := "What tools are available to you? Please just list them briefly."
100
101 // Set a slug so that the agent doesn't have to.
102 agent.SetSlug("list-available-tools")
Earl Lee2e463fb2025-04-17 11:22:22 -0700103
104 // Send the message to the agent
105 agent.UserMessage(ctx, userMessage)
106
107 // Process a single loop iteration to avoid long-running tests
Sean McCullough885a16a2025-04-30 02:49:25 +0000108 agent.processTurn(ctx)
Earl Lee2e463fb2025-04-17 11:22:22 -0700109
110 // Collect responses with a timeout
111 var responses []AgentMessage
Philip Zeyliger9373c072025-05-01 10:27:01 -0700112 ctx2, cancel := context.WithDeadline(ctx, time.Now().Add(10*time.Second))
113 defer cancel()
Earl Lee2e463fb2025-04-17 11:22:22 -0700114 done := false
Philip Zeyligerb7c58752025-05-01 10:10:17 -0700115 it := agent.NewIterator(ctx2, 0)
Earl Lee2e463fb2025-04-17 11:22:22 -0700116
117 for !done {
Philip Zeyligerb7c58752025-05-01 10:10:17 -0700118 msg := it.Next()
119 t.Logf("Received message: Type=%s, EndOfTurn=%v, Content=%q", msg.Type, msg.EndOfTurn, msg.Content)
120 responses = append(responses, *msg)
121 if msg.EndOfTurn {
Earl Lee2e463fb2025-04-17 11:22:22 -0700122 done = true
Earl Lee2e463fb2025-04-17 11:22:22 -0700123 }
124 }
125
126 // Verify we got at least one response
127 if len(responses) == 0 {
128 t.Fatal("No responses received from agent")
129 }
130
131 // Log the received responses for debugging
132 t.Logf("Received %d responses", len(responses))
133
134 // Find the final agent response (with EndOfTurn=true)
135 var finalResponse *AgentMessage
136 for i := range responses {
137 if responses[i].Type == AgentMessageType && responses[i].EndOfTurn {
138 finalResponse = &responses[i]
139 break
140 }
141 }
142
143 // Verify we got a final agent response
144 if finalResponse == nil {
145 t.Fatal("No final agent response received")
146 }
147
148 // Check that the response contains tools information
149 if !strings.Contains(strings.ToLower(finalResponse.Content), "tool") {
150 t.Error("Expected response to mention tools")
151 }
152
153 // Count how many tool use messages we received
154 toolUseCount := 0
155 for _, msg := range responses {
156 if msg.Type == ToolUseMessageType {
157 toolUseCount++
158 }
159 }
160
161 t.Logf("Agent used %d tools in its response", toolUseCount)
162}
Philip Zeyliger99a9a022025-04-27 15:15:25 +0000163
164func TestAgentTracksOutstandingCalls(t *testing.T) {
165 agent := &Agent{
166 outstandingLLMCalls: make(map[string]struct{}),
167 outstandingToolCalls: make(map[string]string),
Sean McCullough96b60dd2025-04-30 09:49:10 -0700168 stateMachine: NewStateMachine(),
Philip Zeyliger99a9a022025-04-27 15:15:25 +0000169 }
170
171 // Check initial state
172 if count := agent.OutstandingLLMCallCount(); count != 0 {
173 t.Errorf("Expected 0 outstanding LLM calls, got %d", count)
174 }
175
176 if tools := agent.OutstandingToolCalls(); len(tools) != 0 {
177 t.Errorf("Expected 0 outstanding tool calls, got %d", len(tools))
178 }
179
180 // Add some calls
181 agent.mu.Lock()
182 agent.outstandingLLMCalls["llm1"] = struct{}{}
183 agent.outstandingToolCalls["tool1"] = "bash"
184 agent.outstandingToolCalls["tool2"] = "think"
185 agent.mu.Unlock()
186
187 // Check tracking works
188 if count := agent.OutstandingLLMCallCount(); count != 1 {
189 t.Errorf("Expected 1 outstanding LLM call, got %d", count)
190 }
191
192 tools := agent.OutstandingToolCalls()
193 if len(tools) != 2 {
194 t.Errorf("Expected 2 outstanding tool calls, got %d", len(tools))
195 }
196
197 // Check removal
198 agent.mu.Lock()
199 delete(agent.outstandingLLMCalls, "llm1")
200 delete(agent.outstandingToolCalls, "tool1")
201 agent.mu.Unlock()
202
203 if count := agent.OutstandingLLMCallCount(); count != 0 {
204 t.Errorf("Expected 0 outstanding LLM calls after removal, got %d", count)
205 }
206
207 tools = agent.OutstandingToolCalls()
208 if len(tools) != 1 {
209 t.Errorf("Expected 1 outstanding tool call after removal, got %d", len(tools))
210 }
211
212 if tools[0] != "think" {
213 t.Errorf("Expected 'think' tool remaining, got %s", tools[0])
214 }
215}
Sean McCullough9f4b8082025-04-30 17:34:07 +0000216
217// TestAgentProcessTurnWithNilResponse tests the scenario where Agent.processTurn receives
218// a nil value for initialResp from processUserMessage.
219func TestAgentProcessTurnWithNilResponse(t *testing.T) {
220 // Create a mock conversation that will return nil and error
221 mockConvo := &MockConvoInterface{
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700222 sendMessageFunc: func(message llm.Message) (*llm.Response, error) {
Sean McCullough9f4b8082025-04-30 17:34:07 +0000223 return nil, fmt.Errorf("test error: simulating nil response")
224 },
225 }
226
227 // Create a minimal Agent instance for testing
228 agent := &Agent{
229 convo: mockConvo,
230 inbox: make(chan string, 10),
Philip Zeyliger9373c072025-05-01 10:27:01 -0700231 subscribers: []chan *AgentMessage{},
Sean McCullough9f4b8082025-04-30 17:34:07 +0000232 outstandingLLMCalls: make(map[string]struct{}),
233 outstandingToolCalls: make(map[string]string),
234 }
235
236 // Create a test context
237 ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
238 defer cancel()
239
240 // Push a test message to the inbox so that processUserMessage will try to process it
241 agent.inbox <- "Test message"
242
243 // Call processTurn - it should exit early without panic when initialResp is nil
244 agent.processTurn(ctx)
245
Philip Zeyliger9373c072025-05-01 10:27:01 -0700246 // Verify error message was added to history
247 agent.mu.Lock()
248 defer agent.mu.Unlock()
249
Josh Bleecher Snyder3b44cc32025-07-22 02:28:14 +0000250 // There should be exactly two messages: slug + error
251 if len(agent.history) != 2 {
252 t.Errorf("Expected exactly two messages (slug + error), got %d", len(agent.history))
Philip Zeyliger9373c072025-05-01 10:27:01 -0700253 } else {
Josh Bleecher Snyder3b44cc32025-07-22 02:28:14 +0000254 slugMsg := agent.history[0]
255 if slugMsg.Type != SlugMessageType {
256 t.Errorf("Expected first message to be slug, got message type: %s", slugMsg.Type)
Sean McCullough9f4b8082025-04-30 17:34:07 +0000257 }
Josh Bleecher Snyder3b44cc32025-07-22 02:28:14 +0000258 errorMsg := agent.history[1]
259 if errorMsg.Type != ErrorMessageType {
260 t.Errorf("Expected second message to be error, got message type: %s", errorMsg.Type)
261 }
262 if !strings.Contains(errorMsg.Content, "simulating nil response") {
263 t.Errorf("Expected error message to contain 'simulating nil response', got: %s", errorMsg.Content)
Sean McCullough9f4b8082025-04-30 17:34:07 +0000264 }
Sean McCullough9f4b8082025-04-30 17:34:07 +0000265 }
266}
267
268// MockConvoInterface implements the ConvoInterface for testing
269type MockConvoInterface struct {
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700270 sendMessageFunc func(message llm.Message) (*llm.Response, error)
271 sendUserTextMessageFunc func(s string, otherContents ...llm.Content) (*llm.Response, error)
Josh Bleecher Snyder64f2aa82025-05-14 18:31:05 +0000272 toolResultContentsFunc func(ctx context.Context, resp *llm.Response) ([]llm.Content, bool, error)
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700273 toolResultCancelContentsFunc func(resp *llm.Response) ([]llm.Content, error)
Sean McCullough9f4b8082025-04-30 17:34:07 +0000274 cancelToolUseFunc func(toolUseID string, cause error) error
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700275 cumulativeUsageFunc func() conversation.CumulativeUsage
Philip Zeyligerb8a8f352025-06-02 07:39:37 -0700276 lastUsageFunc func() llm.Usage
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700277 resetBudgetFunc func(conversation.Budget)
Sean McCullough9f4b8082025-04-30 17:34:07 +0000278 overBudgetFunc func() error
Philip Zeyliger2c4db092025-04-28 16:57:50 -0700279 getIDFunc func() string
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700280 subConvoWithHistoryFunc func() *conversation.Convo
Philip Zeyliger43a0bfc2025-07-14 14:54:27 -0700281 debugJSONFunc func() ([]byte, error)
Sean McCullough9f4b8082025-04-30 17:34:07 +0000282}
283
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700284func (m *MockConvoInterface) SendMessage(message llm.Message) (*llm.Response, error) {
Sean McCullough9f4b8082025-04-30 17:34:07 +0000285 if m.sendMessageFunc != nil {
286 return m.sendMessageFunc(message)
287 }
288 return nil, nil
289}
290
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700291func (m *MockConvoInterface) SendUserTextMessage(s string, otherContents ...llm.Content) (*llm.Response, error) {
Sean McCullough9f4b8082025-04-30 17:34:07 +0000292 if m.sendUserTextMessageFunc != nil {
293 return m.sendUserTextMessageFunc(s, otherContents...)
294 }
295 return nil, nil
296}
297
Josh Bleecher Snyder64f2aa82025-05-14 18:31:05 +0000298func (m *MockConvoInterface) ToolResultContents(ctx context.Context, resp *llm.Response) ([]llm.Content, bool, error) {
Sean McCullough9f4b8082025-04-30 17:34:07 +0000299 if m.toolResultContentsFunc != nil {
300 return m.toolResultContentsFunc(ctx, resp)
301 }
Josh Bleecher Snyder64f2aa82025-05-14 18:31:05 +0000302 return nil, false, nil
Sean McCullough9f4b8082025-04-30 17:34:07 +0000303}
304
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700305func (m *MockConvoInterface) ToolResultCancelContents(resp *llm.Response) ([]llm.Content, error) {
Sean McCullough9f4b8082025-04-30 17:34:07 +0000306 if m.toolResultCancelContentsFunc != nil {
307 return m.toolResultCancelContentsFunc(resp)
308 }
309 return nil, nil
310}
311
312func (m *MockConvoInterface) CancelToolUse(toolUseID string, cause error) error {
313 if m.cancelToolUseFunc != nil {
314 return m.cancelToolUseFunc(toolUseID, cause)
315 }
316 return nil
317}
318
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700319func (m *MockConvoInterface) CumulativeUsage() conversation.CumulativeUsage {
Sean McCullough9f4b8082025-04-30 17:34:07 +0000320 if m.cumulativeUsageFunc != nil {
321 return m.cumulativeUsageFunc()
322 }
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700323 return conversation.CumulativeUsage{}
Sean McCullough9f4b8082025-04-30 17:34:07 +0000324}
325
Philip Zeyligerb8a8f352025-06-02 07:39:37 -0700326func (m *MockConvoInterface) LastUsage() llm.Usage {
327 if m.lastUsageFunc != nil {
328 return m.lastUsageFunc()
329 }
330 return llm.Usage{}
331}
332
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700333func (m *MockConvoInterface) ResetBudget(budget conversation.Budget) {
Sean McCullough9f4b8082025-04-30 17:34:07 +0000334 if m.resetBudgetFunc != nil {
335 m.resetBudgetFunc(budget)
336 }
337}
338
339func (m *MockConvoInterface) OverBudget() error {
340 if m.overBudgetFunc != nil {
341 return m.overBudgetFunc()
342 }
343 return nil
344}
345
Philip Zeyliger2c4db092025-04-28 16:57:50 -0700346func (m *MockConvoInterface) GetID() string {
347 if m.getIDFunc != nil {
348 return m.getIDFunc()
349 }
350 return "mock-convo-id"
351}
352
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700353func (m *MockConvoInterface) SubConvoWithHistory() *conversation.Convo {
Philip Zeyliger2c4db092025-04-28 16:57:50 -0700354 if m.subConvoWithHistoryFunc != nil {
355 return m.subConvoWithHistoryFunc()
356 }
357 return nil
358}
359
Philip Zeyliger43a0bfc2025-07-14 14:54:27 -0700360func (m *MockConvoInterface) DebugJSON() ([]byte, error) {
361 if m.debugJSONFunc != nil {
362 return m.debugJSONFunc()
363 }
364 return []byte(`[{"role": "user", "content": [{"type": "text", "text": "mock conversation"}]}]`), nil
365}
366
Sean McCullough9f4b8082025-04-30 17:34:07 +0000367// TestAgentProcessTurnWithNilResponseNilError tests the scenario where Agent.processTurn receives
368// a nil value for initialResp and nil error from processUserMessage.
369// This test verifies that the implementation properly handles this edge case.
370func TestAgentProcessTurnWithNilResponseNilError(t *testing.T) {
371 // Create a mock conversation that will return nil response and nil error
372 mockConvo := &MockConvoInterface{
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700373 sendMessageFunc: func(message llm.Message) (*llm.Response, error) {
Sean McCullough9f4b8082025-04-30 17:34:07 +0000374 return nil, nil // This is unusual but now handled gracefully
375 },
376 }
377
378 // Create a minimal Agent instance for testing
379 agent := &Agent{
380 convo: mockConvo,
381 inbox: make(chan string, 10),
Philip Zeyliger9373c072025-05-01 10:27:01 -0700382 subscribers: []chan *AgentMessage{},
Sean McCullough9f4b8082025-04-30 17:34:07 +0000383 outstandingLLMCalls: make(map[string]struct{}),
384 outstandingToolCalls: make(map[string]string),
385 }
386
387 // Create a test context
388 ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
389 defer cancel()
390
391 // Push a test message to the inbox so that processUserMessage will try to process it
392 agent.inbox <- "Test message"
393
394 // Call processTurn - it should handle nil initialResp with a descriptive error
395 err := agent.processTurn(ctx)
396
397 // Verify we get the expected error
398 if err == nil {
399 t.Error("Expected processTurn to return an error for nil initialResp, but got nil")
400 } else if !strings.Contains(err.Error(), "unexpected nil response") {
401 t.Errorf("Expected error about nil response, got: %v", err)
402 } else {
403 t.Logf("As expected, processTurn returned error: %v", err)
404 }
405
Philip Zeyliger9373c072025-05-01 10:27:01 -0700406 // Verify error message was added to history
407 agent.mu.Lock()
408 defer agent.mu.Unlock()
409
Josh Bleecher Snyder3b44cc32025-07-22 02:28:14 +0000410 // There should be exactly two messages: slug + error
411 if len(agent.history) != 2 {
412 t.Errorf("Expected exactly two messages (slug + error), got %d", len(agent.history))
Philip Zeyliger9373c072025-05-01 10:27:01 -0700413 } else {
Josh Bleecher Snyder3b44cc32025-07-22 02:28:14 +0000414 slugMsg := agent.history[0]
415 if slugMsg.Type != SlugMessageType {
416 t.Errorf("Expected first message to be slug, got message type: %s", slugMsg.Type)
Sean McCullough9f4b8082025-04-30 17:34:07 +0000417 }
Josh Bleecher Snyder3b44cc32025-07-22 02:28:14 +0000418 errorMsg := agent.history[1]
419 if errorMsg.Type != ErrorMessageType {
420 t.Errorf("Expected second message to be error, got message type: %s", errorMsg.Type)
421 }
422 if !strings.Contains(errorMsg.Content, "unexpected nil response") {
423 t.Errorf("Expected error about nil response, got: %s", errorMsg.Content)
Sean McCullough9f4b8082025-04-30 17:34:07 +0000424 }
Sean McCullough9f4b8082025-04-30 17:34:07 +0000425 }
426}
Sean McCullough96b60dd2025-04-30 09:49:10 -0700427
428func TestAgentStateMachine(t *testing.T) {
429 // Create a simplified test for the state machine functionality
430 agent := &Agent{
431 stateMachine: NewStateMachine(),
432 }
433
434 // Initially the state should be Ready
435 if state := agent.CurrentState(); state != StateReady {
436 t.Errorf("Expected initial state to be StateReady, got %s", state)
437 }
438
439 // Test manual transitions to verify state tracking
440 ctx := context.Background()
441
442 // Track transitions
443 var transitions []State
444 agent.stateMachine.SetTransitionCallback(func(ctx context.Context, from, to State, event TransitionEvent) {
445 transitions = append(transitions, to)
446 t.Logf("State transition: %s -> %s (%s)", from, to, event.Description)
447 })
448
449 // Perform a valid sequence of transitions (based on the state machine rules)
450 expectedStates := []State{
451 StateWaitingForUserInput,
452 StateSendingToLLM,
453 StateProcessingLLMResponse,
454 StateToolUseRequested,
455 StateCheckingForCancellation,
456 StateRunningTool,
457 StateCheckingGitCommits,
458 StateRunningAutoformatters,
459 StateCheckingBudget,
460 StateGatheringAdditionalMessages,
461 StateSendingToolResults,
462 StateProcessingLLMResponse,
463 StateEndOfTurn,
464 }
465
466 // Manually perform each transition
467 for _, state := range expectedStates {
468 err := agent.stateMachine.Transition(ctx, state, "Test transition to "+state.String())
469 if err != nil {
470 t.Errorf("Failed to transition to %s: %v", state, err)
471 }
472 }
473
474 // Check if we recorded the right number of transitions
475 if len(transitions) != len(expectedStates) {
476 t.Errorf("Expected %d state transitions, got %d", len(expectedStates), len(transitions))
477 }
478
479 // Check each transition matched what we expected
480 for i, expected := range expectedStates {
481 if i < len(transitions) {
482 if transitions[i] != expected {
483 t.Errorf("Transition %d: expected %s, got %s", i, expected, transitions[i])
484 }
485 }
486 }
487
488 // Verify the current state is the last one we transitioned to
489 if state := agent.CurrentState(); state != expectedStates[len(expectedStates)-1] {
490 t.Errorf("Expected current state to be %s, got %s", expectedStates[len(expectedStates)-1], state)
491 }
492
493 // Test force transition
494 agent.stateMachine.ForceTransition(ctx, StateCancelled, "Testing force transition")
495
496 // Verify current state was updated
497 if state := agent.CurrentState(); state != StateCancelled {
498 t.Errorf("Expected forced state to be StateCancelled, got %s", state)
499 }
500}
501
502// mockConvoInterface is a mock implementation of ConvoInterface for testing
503type mockConvoInterface struct {
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700504 SendMessageFunc func(message llm.Message) (*llm.Response, error)
Josh Bleecher Snyder64f2aa82025-05-14 18:31:05 +0000505 ToolResultContentsFunc func(ctx context.Context, resp *llm.Response) ([]llm.Content, bool, error)
Sean McCullough96b60dd2025-04-30 09:49:10 -0700506}
507
508func (c *mockConvoInterface) GetID() string {
509 return "mockConvoInterface-id"
510}
511
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700512func (c *mockConvoInterface) SubConvoWithHistory() *conversation.Convo {
Sean McCullough96b60dd2025-04-30 09:49:10 -0700513 return nil
514}
515
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700516func (m *mockConvoInterface) CumulativeUsage() conversation.CumulativeUsage {
517 return conversation.CumulativeUsage{}
Sean McCullough96b60dd2025-04-30 09:49:10 -0700518}
519
Philip Zeyligerb8a8f352025-06-02 07:39:37 -0700520func (m *mockConvoInterface) LastUsage() llm.Usage {
521 return llm.Usage{}
522}
523
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700524func (m *mockConvoInterface) ResetBudget(conversation.Budget) {}
Sean McCullough96b60dd2025-04-30 09:49:10 -0700525
526func (m *mockConvoInterface) OverBudget() error {
527 return nil
528}
529
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700530func (m *mockConvoInterface) SendMessage(message llm.Message) (*llm.Response, error) {
Sean McCullough96b60dd2025-04-30 09:49:10 -0700531 if m.SendMessageFunc != nil {
532 return m.SendMessageFunc(message)
533 }
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700534 return &llm.Response{StopReason: llm.StopReasonEndTurn}, nil
Sean McCullough96b60dd2025-04-30 09:49:10 -0700535}
536
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700537func (m *mockConvoInterface) SendUserTextMessage(s string, otherContents ...llm.Content) (*llm.Response, error) {
538 return m.SendMessage(llm.UserStringMessage(s))
Sean McCullough96b60dd2025-04-30 09:49:10 -0700539}
540
Josh Bleecher Snyder64f2aa82025-05-14 18:31:05 +0000541func (m *mockConvoInterface) ToolResultContents(ctx context.Context, resp *llm.Response) ([]llm.Content, bool, error) {
Sean McCullough96b60dd2025-04-30 09:49:10 -0700542 if m.ToolResultContentsFunc != nil {
543 return m.ToolResultContentsFunc(ctx, resp)
544 }
Josh Bleecher Snyder64f2aa82025-05-14 18:31:05 +0000545 return []llm.Content{}, false, nil
Sean McCullough96b60dd2025-04-30 09:49:10 -0700546}
547
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700548func (m *mockConvoInterface) ToolResultCancelContents(resp *llm.Response) ([]llm.Content, error) {
549 return []llm.Content{llm.StringContent("Tool use cancelled")}, nil
Sean McCullough96b60dd2025-04-30 09:49:10 -0700550}
551
552func (m *mockConvoInterface) CancelToolUse(toolUseID string, cause error) error {
553 return nil
554}
555
Philip Zeyliger43a0bfc2025-07-14 14:54:27 -0700556func (m *mockConvoInterface) DebugJSON() ([]byte, error) {
557 return []byte(`[{"role": "user", "content": [{"type": "text", "text": "mock conversation"}]}]`), nil
558}
559
Sean McCullough96b60dd2025-04-30 09:49:10 -0700560func TestAgentProcessTurnStateTransitions(t *testing.T) {
561 // Create a mock ConvoInterface for testing
562 mockConvo := &mockConvoInterface{}
563
564 // Use the testing context
565 ctx := t.Context()
566
567 // Create an agent with the state machine
568 agent := &Agent{
Philip Zeyligerf2872992025-05-22 10:35:28 -0700569 convo: mockConvo,
570 config: AgentConfig{Context: ctx},
571 inbox: make(chan string, 10),
572 ready: make(chan struct{}),
573
Sean McCullough96b60dd2025-04-30 09:49:10 -0700574 outstandingLLMCalls: make(map[string]struct{}),
575 outstandingToolCalls: make(map[string]string),
576 stateMachine: NewStateMachine(),
577 startOfTurn: time.Now(),
Philip Zeyliger9373c072025-05-01 10:27:01 -0700578 subscribers: []chan *AgentMessage{},
Sean McCullough96b60dd2025-04-30 09:49:10 -0700579 }
580
581 // Verify initial state
582 if state := agent.CurrentState(); state != StateReady {
583 t.Errorf("Expected initial state to be StateReady, got %s", state)
584 }
585
586 // Add a message to the inbox so we don't block in GatherMessages
587 agent.inbox <- "Test message"
588
589 // Setup the mock to simulate a model response with end of turn
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700590 mockConvo.SendMessageFunc = func(message llm.Message) (*llm.Response, error) {
591 return &llm.Response{
592 StopReason: llm.StopReasonEndTurn,
593 Content: []llm.Content{
594 llm.StringContent("This is a test response"),
Sean McCullough96b60dd2025-04-30 09:49:10 -0700595 },
596 }, nil
597 }
598
599 // Track state transitions
600 var transitions []State
601 agent.stateMachine.SetTransitionCallback(func(ctx context.Context, from, to State, event TransitionEvent) {
602 transitions = append(transitions, to)
603 t.Logf("State transition: %s -> %s (%s)", from, to, event.Description)
604 })
605
606 // Process a turn, which should trigger state transitions
607 agent.processTurn(ctx)
608
609 // The minimum expected states for a simple end-of-turn response
610 minExpectedStates := []State{
611 StateWaitingForUserInput,
612 StateSendingToLLM,
613 StateProcessingLLMResponse,
614 StateEndOfTurn,
615 }
616
617 // Verify we have at least the minimum expected states
618 if len(transitions) < len(minExpectedStates) {
619 t.Errorf("Expected at least %d state transitions, got %d", len(minExpectedStates), len(transitions))
620 }
621
622 // Check that the transitions follow the expected sequence
623 for i, expected := range minExpectedStates {
624 if i < len(transitions) {
625 if transitions[i] != expected {
626 t.Errorf("Transition %d: expected %s, got %s", i, expected, transitions[i])
627 }
628 }
629 }
630
631 // Verify the final state is EndOfTurn
632 if state := agent.CurrentState(); state != StateEndOfTurn {
633 t.Errorf("Expected final state to be StateEndOfTurn, got %s", state)
634 }
635}
636
637func TestAgentProcessTurnWithToolUse(t *testing.T) {
638 // Create a mock ConvoInterface for testing
639 mockConvo := &mockConvoInterface{}
640
641 // Setup a test context
642 ctx := context.Background()
643
644 // Create an agent with the state machine
645 agent := &Agent{
Philip Zeyligerf2872992025-05-22 10:35:28 -0700646 convo: mockConvo,
647 config: AgentConfig{Context: ctx},
648 inbox: make(chan string, 10),
649 ready: make(chan struct{}),
650
Sean McCullough96b60dd2025-04-30 09:49:10 -0700651 outstandingLLMCalls: make(map[string]struct{}),
652 outstandingToolCalls: make(map[string]string),
653 stateMachine: NewStateMachine(),
654 startOfTurn: time.Now(),
Philip Zeyliger9373c072025-05-01 10:27:01 -0700655 subscribers: []chan *AgentMessage{},
Sean McCullough96b60dd2025-04-30 09:49:10 -0700656 }
657
658 // Add a message to the inbox so we don't block in GatherMessages
659 agent.inbox <- "Test message"
660
661 // First response requests a tool
662 firstResponseDone := false
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700663 mockConvo.SendMessageFunc = func(message llm.Message) (*llm.Response, error) {
Sean McCullough96b60dd2025-04-30 09:49:10 -0700664 if !firstResponseDone {
665 firstResponseDone = true
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700666 return &llm.Response{
667 StopReason: llm.StopReasonToolUse,
668 Content: []llm.Content{
669 llm.StringContent("I'll use a tool"),
670 {Type: llm.ContentTypeToolUse, ToolName: "test_tool", ToolInput: []byte("{}"), ID: "test_id"},
Sean McCullough96b60dd2025-04-30 09:49:10 -0700671 },
672 }, nil
673 }
674 // Second response ends the turn
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700675 return &llm.Response{
676 StopReason: llm.StopReasonEndTurn,
677 Content: []llm.Content{
678 llm.StringContent("Finished using the tool"),
Sean McCullough96b60dd2025-04-30 09:49:10 -0700679 },
680 }, nil
681 }
682
683 // Tool result content handler
Josh Bleecher Snyder64f2aa82025-05-14 18:31:05 +0000684 mockConvo.ToolResultContentsFunc = func(ctx context.Context, resp *llm.Response) ([]llm.Content, bool, error) {
685 return []llm.Content{llm.StringContent("Tool executed successfully")}, false, nil
Sean McCullough96b60dd2025-04-30 09:49:10 -0700686 }
687
688 // Track state transitions
689 var transitions []State
690 agent.stateMachine.SetTransitionCallback(func(ctx context.Context, from, to State, event TransitionEvent) {
691 transitions = append(transitions, to)
692 t.Logf("State transition: %s -> %s (%s)", from, to, event.Description)
693 })
694
695 // Process a turn with tool use
696 agent.processTurn(ctx)
697
698 // Define expected states for a tool use flow
699 expectedToolStates := []State{
700 StateWaitingForUserInput,
701 StateSendingToLLM,
702 StateProcessingLLMResponse,
703 StateToolUseRequested,
704 StateCheckingForCancellation,
705 StateRunningTool,
706 }
707
708 // Verify that these states are present in order
709 for i, expectedState := range expectedToolStates {
710 if i >= len(transitions) {
711 t.Errorf("Missing expected transition to %s; only got %d transitions", expectedState, len(transitions))
712 continue
713 }
714 if transitions[i] != expectedState {
715 t.Errorf("Expected transition %d to be %s, got %s", i, expectedState, transitions[i])
716 }
717 }
718
719 // Also verify we eventually reached EndOfTurn
720 if !slices.Contains(transitions, StateEndOfTurn) {
721 t.Errorf("Expected to eventually reach StateEndOfTurn, but never did")
722 }
723}
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700724
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700725func TestPushToOutbox(t *testing.T) {
726 // Create a new agent
727 a := &Agent{
728 outstandingLLMCalls: make(map[string]struct{}),
729 outstandingToolCalls: make(map[string]string),
730 stateMachine: NewStateMachine(),
731 subscribers: make([]chan *AgentMessage, 0),
732 }
733
734 // Create a channel to receive messages
735 messageCh := make(chan *AgentMessage, 1)
736
737 // Add the channel to the subscribers list
738 a.mu.Lock()
739 a.subscribers = append(a.subscribers, messageCh)
740 a.mu.Unlock()
741
742 // We need to set the text that would be produced by our modified contentToString function
743 resultText := "test resultnested result" // Directly set the expected output
744
745 // In a real-world scenario, this would be coming from a toolResult that contained nested content
746
747 m := AgentMessage{
748 Type: ToolUseMessageType,
749 ToolResult: resultText,
750 }
751
752 // Push the message to the outbox
753 a.pushToOutbox(context.Background(), m)
754
755 // Receive the message from the subscriber
756 received := <-messageCh
757
758 // Check that the Content field contains the concatenated text from ToolResult
759 expected := "test resultnested result"
760 if received.Content != expected {
761 t.Errorf("Expected Content to be %q, got %q", expected, received.Content)
762 }
763}
Josh Bleecher Snyder3b44cc32025-07-22 02:28:14 +0000764
765// TestCleanSlugName tests the slug cleaning function
766func TestCleanSlugName(t *testing.T) {
767 tests := []struct {
768 name string
769 input string
770 want string
771 }{
772 {"simple lowercase", "fix-bug", "fix-bug"},
773 {"uppercase to lowercase", "FIX-BUG", "fix-bug"},
774 {"spaces to hyphens", "fix login bug", "fix-login-bug"},
775 {"mixed case and spaces", "Fix Login Bug", "fix-login-bug"},
776 {"special characters removed", "fix_bug@home!", "fixbughome"},
777 {"multiple hyphens preserved", "fix--bug---here", "fix--bug---here"},
778 {"leading/trailing hyphens preserved", "-fix-bug-", "-fix-bug-"},
779 {"numbers preserved", "fix-bug-v2", "fix-bug-v2"},
780 {"empty string", "", ""},
781 {"only special chars", "@#$%", ""},
782 }
783
784 for _, tt := range tests {
785 t.Run(tt.name, func(t *testing.T) {
786 got := cleanSlugName(tt.input)
787 if got != tt.want {
788 t.Errorf("cleanSlugName(%q) = %q, want %q", tt.input, got, tt.want)
789 }
790 })
791 }
792}
793
794// TestAutoGenerateSlugInputValidation tests input validation for auto slug generation
795func TestAutoGenerateSlugInputValidation(t *testing.T) {
796 // Test soleText with empty input
797 emptyContents := []llm.Content{}
798 _, err := soleText(emptyContents)
799 if err == nil {
800 t.Errorf("Expected error for empty contents, got nil")
801 }
802
803 // Test with non-text content only
804 nonTextContents := []llm.Content{
805 {Type: llm.ContentTypeToolUse, ToolName: "bash"},
806 }
807 _, err = soleText(nonTextContents)
808 if err == nil {
809 t.Errorf("Expected error for non-text contents, got nil")
810 }
811
812 // Test slug formatting
813 testInputs := []string{
814 "Fix the login bug",
815 "Add user authentication system",
816 "Refactor API endpoints",
817 "Update documentation",
818 }
819
820 for _, input := range testInputs {
821 slug := cleanSlugName(strings.ToLower(strings.ReplaceAll(input, " ", "-")))
822 if slug == "" {
823 t.Errorf("cleanSlugName produced empty result for input %q", input)
824 }
825 if !strings.Contains(slug, "-") {
826 // We expect most multi-word inputs to contain hyphens after processing
827 t.Logf("Input %q produced slug %q (no hyphen found, might be single word)", input, slug)
828 }
829 }
830}
831
832// TestSoleText tests the soleText helper function
833func TestSoleText(t *testing.T) {
834 tests := []struct {
835 name string
836 contents []llm.Content
837 wantText string
838 wantErr bool
839 }{
840 {
841 name: "single text content",
842 contents: []llm.Content{
843 {Type: llm.ContentTypeText, Text: " Hello world "},
844 },
845 wantText: "Hello world",
846 wantErr: false,
847 },
848 {
849 name: "empty slice",
850 contents: []llm.Content{},
851 wantText: "",
852 wantErr: true,
853 },
854 {
855 name: "multiple contents",
856 contents: []llm.Content{
857 {Type: llm.ContentTypeText, Text: "First"},
858 {Type: llm.ContentTypeText, Text: "Second"},
859 },
860 wantText: "",
861 wantErr: true,
862 },
863 {
864 name: "non-text content",
865 contents: []llm.Content{
866 {Type: llm.ContentTypeToolUse, ToolName: "bash"},
867 },
868 wantText: "",
869 wantErr: true,
870 },
871 {
872 name: "empty text content",
873 contents: []llm.Content{
874 {Type: llm.ContentTypeText, Text: ""},
875 },
876 wantText: "",
877 wantErr: true,
878 },
879 }
880
881 for _, tt := range tests {
882 t.Run(tt.name, func(t *testing.T) {
883 gotText, err := soleText(tt.contents)
884 if (err != nil) != tt.wantErr {
885 t.Errorf("soleText() error = %v, wantErr %v", err, tt.wantErr)
886 return
887 }
888 if gotText != tt.wantText {
889 t.Errorf("soleText() gotText = %v, want %v", gotText, tt.wantText)
890 }
891 })
892 }
893}
Josh Bleecher Snyder8a0de522025-07-24 19:29:07 +0000894
895// TestSystemPromptIncludesDateTime tests that the system prompt includes current date/time
896func TestSystemPromptIncludesDateTime(t *testing.T) {
897 ctx := context.Background()
898
899 // Create a minimal agent config for testing
900 config := AgentConfig{
901 Context: ctx,
902 ClientGOOS: "linux",
903 ClientGOARCH: "amd64",
904 }
905
906 // Create agent
907 agent := NewAgent(config)
908
909 // Use fixed time for deterministic tests
910 fixedTime := time.Date(2025, 7, 25, 19, 37, 57, 0, time.UTC)
911 agent.now = func() time.Time { return fixedTime }
912
913 // Set minimal required fields for rendering
914 agent.workingDir = "/tmp"
915 agent.repoRoot = "/tmp"
916
917 // Mock SketchGitBase to return a valid commit hash
918 // We'll override this by setting a method that returns a fixed value
919 // Since we can't easily mock the git calls, we'll work around it
920
921 // Render the system prompt
922 systemPrompt := agent.renderSystemPrompt()
923
Josh Bleecher Snyder9224eb02025-07-26 04:45:05 +0000924 // Check that the system prompt contains a current_date section
925 if !strings.Contains(systemPrompt, "<current_date>") {
926 t.Error("System prompt should contain <current_date> section")
Josh Bleecher Snyder8a0de522025-07-24 19:29:07 +0000927 }
928
Josh Bleecher Snyder9224eb02025-07-26 04:45:05 +0000929 // Check that it contains what looks like a date
930 // The format is "2006-01-02" (time.DateOnly)
931 if !strings.Contains(systemPrompt, "-") {
932 t.Error("System prompt should contain a formatted date")
Josh Bleecher Snyder8a0de522025-07-24 19:29:07 +0000933 }
934
Josh Bleecher Snyder9224eb02025-07-26 04:45:05 +0000935 // Verify the expected fixed date (2025-07-25)
936 expectedDate := "2025-07-25"
937 if !strings.Contains(systemPrompt, expectedDate) {
938 t.Errorf("System prompt should contain expected fixed date %s", expectedDate)
Josh Bleecher Snyder8a0de522025-07-24 19:29:07 +0000939 }
940
941 // Print part of the system prompt for manual verification in test output
Josh Bleecher Snyder9224eb02025-07-26 04:45:05 +0000942 // Find the current_date section
943 start := strings.Index(systemPrompt, "<current_date>")
Josh Bleecher Snyder8a0de522025-07-24 19:29:07 +0000944 if start != -1 {
Josh Bleecher Snyder9224eb02025-07-26 04:45:05 +0000945 end := strings.Index(systemPrompt[start:], "</current_date>") + start
Josh Bleecher Snyder8a0de522025-07-24 19:29:07 +0000946 if end > start {
Josh Bleecher Snyder9224eb02025-07-26 04:45:05 +0000947 dateSection := systemPrompt[start : end+len("</current_date>")]
948 t.Logf("Date section in system prompt: %s", dateSection)
Josh Bleecher Snyder8a0de522025-07-24 19:29:07 +0000949 }
950 }
951}