blob: 38422a3a482df341de8f42fc5098ba3396f10450 [file] [log] [blame]
Earl Lee2e463fb2025-04-17 11:22:22 -07001package loop
2
3import (
Josh Bleecher Snyder4d5e9972025-05-01 15:56:37 -07004 "cmp"
Earl Lee2e463fb2025-04-17 11:22:22 -07005 "context"
Sean McCullough9f4b8082025-04-30 17:34:07 +00006 "fmt"
Earl Lee2e463fb2025-04-17 11:22:22 -07007 "net/http"
8 "os"
Sean McCullough96b60dd2025-04-30 09:49:10 -07009 "slices"
Earl Lee2e463fb2025-04-17 11:22:22 -070010 "strings"
11 "testing"
12 "time"
13
Earl Lee2e463fb2025-04-17 11:22:22 -070014 "sketch.dev/httprr"
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070015 "sketch.dev/llm"
16 "sketch.dev/llm/ant"
17 "sketch.dev/llm/conversation"
Earl Lee2e463fb2025-04-17 11:22:22 -070018)
19
20// TestAgentLoop tests that the Agent loop functionality works correctly.
21// It uses the httprr package to record HTTP interactions for replay in tests.
22// When failing, rebuild with "go test ./sketch/loop -run TestAgentLoop -httprecord .*agent_loop.*"
23// as necessary.
24func TestAgentLoop(t *testing.T) {
25 ctx := context.Background()
26
27 // Setup httprr recorder
28 rrPath := "testdata/agent_loop.httprr"
29 rr, err := httprr.Open(rrPath, http.DefaultTransport)
30 if err != nil && !os.IsNotExist(err) {
31 t.Fatal(err)
32 }
33
34 if rr.Recording() {
35 // Skip the test if API key is not available
36 if os.Getenv("ANTHROPIC_API_KEY") == "" {
37 t.Fatal("ANTHROPIC_API_KEY not set, required for HTTP recording")
38 }
39 }
40
41 // Create HTTP client
42 var client *http.Client
43 if rr != nil {
44 // Scrub API keys from requests for security
45 rr.ScrubReq(func(req *http.Request) error {
46 req.Header.Del("x-api-key")
47 req.Header.Del("anthropic-api-key")
48 return nil
49 })
50 client = rr.Client()
51 } else {
52 client = &http.Client{Transport: http.DefaultTransport}
53 }
54
55 // Create a new agent with the httprr client
56 origWD, err := os.Getwd()
57 if err != nil {
58 t.Fatal(err)
59 }
60 if err := os.Chdir("/"); err != nil {
61 t.Fatal(err)
62 }
Philip Zeyligere6c294d2025-06-04 16:55:21 +000063 budget := conversation.Budget{MaxDollars: 10.0}
Earl Lee2e463fb2025-04-17 11:22:22 -070064 wd, err := os.Getwd()
65 if err != nil {
66 t.Fatal(err)
67 }
68
David Crawshaw3659d872025-05-05 17:52:23 -070069 apiKey := cmp.Or(os.Getenv("OUTER_SKETCH_MODEL_API_KEY"), os.Getenv("ANTHROPIC_API_KEY"))
Earl Lee2e463fb2025-04-17 11:22:22 -070070 cfg := AgentConfig{
Philip Zeyligerbc8c8dc2025-05-21 13:19:13 -070071 Context: ctx,
72 WorkingDir: wd,
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -070073 Service: &ant.Service{
74 APIKey: apiKey,
75 HTTPC: client,
76 },
Earl Lee2e463fb2025-04-17 11:22:22 -070077 Budget: budget,
78 GitUsername: "Test Agent",
79 GitEmail: "totallyhuman@sketch.dev",
80 SessionID: "test-session-id",
81 ClientGOOS: "linux",
82 ClientGOARCH: "amd64",
83 }
84 agent := NewAgent(cfg)
85 if err := os.Chdir(origWD); err != nil {
86 t.Fatal(err)
87 }
Philip Zeyligerbc8c8dc2025-05-21 13:19:13 -070088 err = agent.Init(AgentInit{NoGit: true})
Earl Lee2e463fb2025-04-17 11:22:22 -070089 if err != nil {
90 t.Fatal(err)
91 }
92
93 // Setup a test message that will trigger a simple, predictable response
Josh Bleecher Snyder3b44cc32025-07-22 02:28:14 +000094 userMessage := "What tools are available to you? Please just list them briefly."
95
96 // Set a slug so that the agent doesn't have to.
97 agent.SetSlug("list-available-tools")
Earl Lee2e463fb2025-04-17 11:22:22 -070098
99 // Send the message to the agent
100 agent.UserMessage(ctx, userMessage)
101
102 // Process a single loop iteration to avoid long-running tests
Sean McCullough885a16a2025-04-30 02:49:25 +0000103 agent.processTurn(ctx)
Earl Lee2e463fb2025-04-17 11:22:22 -0700104
105 // Collect responses with a timeout
106 var responses []AgentMessage
Philip Zeyliger9373c072025-05-01 10:27:01 -0700107 ctx2, cancel := context.WithDeadline(ctx, time.Now().Add(10*time.Second))
108 defer cancel()
Earl Lee2e463fb2025-04-17 11:22:22 -0700109 done := false
Philip Zeyligerb7c58752025-05-01 10:10:17 -0700110 it := agent.NewIterator(ctx2, 0)
Earl Lee2e463fb2025-04-17 11:22:22 -0700111
112 for !done {
Philip Zeyligerb7c58752025-05-01 10:10:17 -0700113 msg := it.Next()
114 t.Logf("Received message: Type=%s, EndOfTurn=%v, Content=%q", msg.Type, msg.EndOfTurn, msg.Content)
115 responses = append(responses, *msg)
116 if msg.EndOfTurn {
Earl Lee2e463fb2025-04-17 11:22:22 -0700117 done = true
Earl Lee2e463fb2025-04-17 11:22:22 -0700118 }
119 }
120
121 // Verify we got at least one response
122 if len(responses) == 0 {
123 t.Fatal("No responses received from agent")
124 }
125
126 // Log the received responses for debugging
127 t.Logf("Received %d responses", len(responses))
128
129 // Find the final agent response (with EndOfTurn=true)
130 var finalResponse *AgentMessage
131 for i := range responses {
132 if responses[i].Type == AgentMessageType && responses[i].EndOfTurn {
133 finalResponse = &responses[i]
134 break
135 }
136 }
137
138 // Verify we got a final agent response
139 if finalResponse == nil {
140 t.Fatal("No final agent response received")
141 }
142
143 // Check that the response contains tools information
144 if !strings.Contains(strings.ToLower(finalResponse.Content), "tool") {
145 t.Error("Expected response to mention tools")
146 }
147
148 // Count how many tool use messages we received
149 toolUseCount := 0
150 for _, msg := range responses {
151 if msg.Type == ToolUseMessageType {
152 toolUseCount++
153 }
154 }
155
156 t.Logf("Agent used %d tools in its response", toolUseCount)
157}
Philip Zeyliger99a9a022025-04-27 15:15:25 +0000158
159func TestAgentTracksOutstandingCalls(t *testing.T) {
160 agent := &Agent{
161 outstandingLLMCalls: make(map[string]struct{}),
162 outstandingToolCalls: make(map[string]string),
Sean McCullough96b60dd2025-04-30 09:49:10 -0700163 stateMachine: NewStateMachine(),
Philip Zeyliger99a9a022025-04-27 15:15:25 +0000164 }
165
166 // Check initial state
167 if count := agent.OutstandingLLMCallCount(); count != 0 {
168 t.Errorf("Expected 0 outstanding LLM calls, got %d", count)
169 }
170
171 if tools := agent.OutstandingToolCalls(); len(tools) != 0 {
172 t.Errorf("Expected 0 outstanding tool calls, got %d", len(tools))
173 }
174
175 // Add some calls
176 agent.mu.Lock()
177 agent.outstandingLLMCalls["llm1"] = struct{}{}
178 agent.outstandingToolCalls["tool1"] = "bash"
179 agent.outstandingToolCalls["tool2"] = "think"
180 agent.mu.Unlock()
181
182 // Check tracking works
183 if count := agent.OutstandingLLMCallCount(); count != 1 {
184 t.Errorf("Expected 1 outstanding LLM call, got %d", count)
185 }
186
187 tools := agent.OutstandingToolCalls()
188 if len(tools) != 2 {
189 t.Errorf("Expected 2 outstanding tool calls, got %d", len(tools))
190 }
191
192 // Check removal
193 agent.mu.Lock()
194 delete(agent.outstandingLLMCalls, "llm1")
195 delete(agent.outstandingToolCalls, "tool1")
196 agent.mu.Unlock()
197
198 if count := agent.OutstandingLLMCallCount(); count != 0 {
199 t.Errorf("Expected 0 outstanding LLM calls after removal, got %d", count)
200 }
201
202 tools = agent.OutstandingToolCalls()
203 if len(tools) != 1 {
204 t.Errorf("Expected 1 outstanding tool call after removal, got %d", len(tools))
205 }
206
207 if tools[0] != "think" {
208 t.Errorf("Expected 'think' tool remaining, got %s", tools[0])
209 }
210}
Sean McCullough9f4b8082025-04-30 17:34:07 +0000211
212// TestAgentProcessTurnWithNilResponse tests the scenario where Agent.processTurn receives
213// a nil value for initialResp from processUserMessage.
214func TestAgentProcessTurnWithNilResponse(t *testing.T) {
215 // Create a mock conversation that will return nil and error
216 mockConvo := &MockConvoInterface{
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700217 sendMessageFunc: func(message llm.Message) (*llm.Response, error) {
Sean McCullough9f4b8082025-04-30 17:34:07 +0000218 return nil, fmt.Errorf("test error: simulating nil response")
219 },
220 }
221
222 // Create a minimal Agent instance for testing
223 agent := &Agent{
224 convo: mockConvo,
225 inbox: make(chan string, 10),
Philip Zeyliger9373c072025-05-01 10:27:01 -0700226 subscribers: []chan *AgentMessage{},
Sean McCullough9f4b8082025-04-30 17:34:07 +0000227 outstandingLLMCalls: make(map[string]struct{}),
228 outstandingToolCalls: make(map[string]string),
229 }
230
231 // Create a test context
232 ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
233 defer cancel()
234
235 // Push a test message to the inbox so that processUserMessage will try to process it
236 agent.inbox <- "Test message"
237
238 // Call processTurn - it should exit early without panic when initialResp is nil
239 agent.processTurn(ctx)
240
Philip Zeyliger9373c072025-05-01 10:27:01 -0700241 // Verify error message was added to history
242 agent.mu.Lock()
243 defer agent.mu.Unlock()
244
Josh Bleecher Snyder3b44cc32025-07-22 02:28:14 +0000245 // There should be exactly two messages: slug + error
246 if len(agent.history) != 2 {
247 t.Errorf("Expected exactly two messages (slug + error), got %d", len(agent.history))
Philip Zeyliger9373c072025-05-01 10:27:01 -0700248 } else {
Josh Bleecher Snyder3b44cc32025-07-22 02:28:14 +0000249 slugMsg := agent.history[0]
250 if slugMsg.Type != SlugMessageType {
251 t.Errorf("Expected first message to be slug, got message type: %s", slugMsg.Type)
Sean McCullough9f4b8082025-04-30 17:34:07 +0000252 }
Josh Bleecher Snyder3b44cc32025-07-22 02:28:14 +0000253 errorMsg := agent.history[1]
254 if errorMsg.Type != ErrorMessageType {
255 t.Errorf("Expected second message to be error, got message type: %s", errorMsg.Type)
256 }
257 if !strings.Contains(errorMsg.Content, "simulating nil response") {
258 t.Errorf("Expected error message to contain 'simulating nil response', got: %s", errorMsg.Content)
Sean McCullough9f4b8082025-04-30 17:34:07 +0000259 }
Sean McCullough9f4b8082025-04-30 17:34:07 +0000260 }
261}
262
263// MockConvoInterface implements the ConvoInterface for testing
264type MockConvoInterface struct {
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700265 sendMessageFunc func(message llm.Message) (*llm.Response, error)
266 sendUserTextMessageFunc func(s string, otherContents ...llm.Content) (*llm.Response, error)
Josh Bleecher Snyder64f2aa82025-05-14 18:31:05 +0000267 toolResultContentsFunc func(ctx context.Context, resp *llm.Response) ([]llm.Content, bool, error)
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700268 toolResultCancelContentsFunc func(resp *llm.Response) ([]llm.Content, error)
Sean McCullough9f4b8082025-04-30 17:34:07 +0000269 cancelToolUseFunc func(toolUseID string, cause error) error
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700270 cumulativeUsageFunc func() conversation.CumulativeUsage
Philip Zeyligerb8a8f352025-06-02 07:39:37 -0700271 lastUsageFunc func() llm.Usage
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700272 resetBudgetFunc func(conversation.Budget)
Sean McCullough9f4b8082025-04-30 17:34:07 +0000273 overBudgetFunc func() error
Philip Zeyliger2c4db092025-04-28 16:57:50 -0700274 getIDFunc func() string
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700275 subConvoWithHistoryFunc func() *conversation.Convo
Philip Zeyliger43a0bfc2025-07-14 14:54:27 -0700276 debugJSONFunc func() ([]byte, error)
Sean McCullough9f4b8082025-04-30 17:34:07 +0000277}
278
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700279func (m *MockConvoInterface) SendMessage(message llm.Message) (*llm.Response, error) {
Sean McCullough9f4b8082025-04-30 17:34:07 +0000280 if m.sendMessageFunc != nil {
281 return m.sendMessageFunc(message)
282 }
283 return nil, nil
284}
285
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700286func (m *MockConvoInterface) SendUserTextMessage(s string, otherContents ...llm.Content) (*llm.Response, error) {
Sean McCullough9f4b8082025-04-30 17:34:07 +0000287 if m.sendUserTextMessageFunc != nil {
288 return m.sendUserTextMessageFunc(s, otherContents...)
289 }
290 return nil, nil
291}
292
Josh Bleecher Snyder64f2aa82025-05-14 18:31:05 +0000293func (m *MockConvoInterface) ToolResultContents(ctx context.Context, resp *llm.Response) ([]llm.Content, bool, error) {
Sean McCullough9f4b8082025-04-30 17:34:07 +0000294 if m.toolResultContentsFunc != nil {
295 return m.toolResultContentsFunc(ctx, resp)
296 }
Josh Bleecher Snyder64f2aa82025-05-14 18:31:05 +0000297 return nil, false, nil
Sean McCullough9f4b8082025-04-30 17:34:07 +0000298}
299
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700300func (m *MockConvoInterface) ToolResultCancelContents(resp *llm.Response) ([]llm.Content, error) {
Sean McCullough9f4b8082025-04-30 17:34:07 +0000301 if m.toolResultCancelContentsFunc != nil {
302 return m.toolResultCancelContentsFunc(resp)
303 }
304 return nil, nil
305}
306
307func (m *MockConvoInterface) CancelToolUse(toolUseID string, cause error) error {
308 if m.cancelToolUseFunc != nil {
309 return m.cancelToolUseFunc(toolUseID, cause)
310 }
311 return nil
312}
313
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700314func (m *MockConvoInterface) CumulativeUsage() conversation.CumulativeUsage {
Sean McCullough9f4b8082025-04-30 17:34:07 +0000315 if m.cumulativeUsageFunc != nil {
316 return m.cumulativeUsageFunc()
317 }
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700318 return conversation.CumulativeUsage{}
Sean McCullough9f4b8082025-04-30 17:34:07 +0000319}
320
Philip Zeyligerb8a8f352025-06-02 07:39:37 -0700321func (m *MockConvoInterface) LastUsage() llm.Usage {
322 if m.lastUsageFunc != nil {
323 return m.lastUsageFunc()
324 }
325 return llm.Usage{}
326}
327
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700328func (m *MockConvoInterface) ResetBudget(budget conversation.Budget) {
Sean McCullough9f4b8082025-04-30 17:34:07 +0000329 if m.resetBudgetFunc != nil {
330 m.resetBudgetFunc(budget)
331 }
332}
333
334func (m *MockConvoInterface) OverBudget() error {
335 if m.overBudgetFunc != nil {
336 return m.overBudgetFunc()
337 }
338 return nil
339}
340
Philip Zeyliger2c4db092025-04-28 16:57:50 -0700341func (m *MockConvoInterface) GetID() string {
342 if m.getIDFunc != nil {
343 return m.getIDFunc()
344 }
345 return "mock-convo-id"
346}
347
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700348func (m *MockConvoInterface) SubConvoWithHistory() *conversation.Convo {
Philip Zeyliger2c4db092025-04-28 16:57:50 -0700349 if m.subConvoWithHistoryFunc != nil {
350 return m.subConvoWithHistoryFunc()
351 }
352 return nil
353}
354
Philip Zeyliger43a0bfc2025-07-14 14:54:27 -0700355func (m *MockConvoInterface) DebugJSON() ([]byte, error) {
356 if m.debugJSONFunc != nil {
357 return m.debugJSONFunc()
358 }
359 return []byte(`[{"role": "user", "content": [{"type": "text", "text": "mock conversation"}]}]`), nil
360}
361
Sean McCullough9f4b8082025-04-30 17:34:07 +0000362// TestAgentProcessTurnWithNilResponseNilError tests the scenario where Agent.processTurn receives
363// a nil value for initialResp and nil error from processUserMessage.
364// This test verifies that the implementation properly handles this edge case.
365func TestAgentProcessTurnWithNilResponseNilError(t *testing.T) {
366 // Create a mock conversation that will return nil response and nil error
367 mockConvo := &MockConvoInterface{
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700368 sendMessageFunc: func(message llm.Message) (*llm.Response, error) {
Sean McCullough9f4b8082025-04-30 17:34:07 +0000369 return nil, nil // This is unusual but now handled gracefully
370 },
371 }
372
373 // Create a minimal Agent instance for testing
374 agent := &Agent{
375 convo: mockConvo,
376 inbox: make(chan string, 10),
Philip Zeyliger9373c072025-05-01 10:27:01 -0700377 subscribers: []chan *AgentMessage{},
Sean McCullough9f4b8082025-04-30 17:34:07 +0000378 outstandingLLMCalls: make(map[string]struct{}),
379 outstandingToolCalls: make(map[string]string),
380 }
381
382 // Create a test context
383 ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
384 defer cancel()
385
386 // Push a test message to the inbox so that processUserMessage will try to process it
387 agent.inbox <- "Test message"
388
389 // Call processTurn - it should handle nil initialResp with a descriptive error
390 err := agent.processTurn(ctx)
391
392 // Verify we get the expected error
393 if err == nil {
394 t.Error("Expected processTurn to return an error for nil initialResp, but got nil")
395 } else if !strings.Contains(err.Error(), "unexpected nil response") {
396 t.Errorf("Expected error about nil response, got: %v", err)
397 } else {
398 t.Logf("As expected, processTurn returned error: %v", err)
399 }
400
Philip Zeyliger9373c072025-05-01 10:27:01 -0700401 // Verify error message was added to history
402 agent.mu.Lock()
403 defer agent.mu.Unlock()
404
Josh Bleecher Snyder3b44cc32025-07-22 02:28:14 +0000405 // There should be exactly two messages: slug + error
406 if len(agent.history) != 2 {
407 t.Errorf("Expected exactly two messages (slug + error), got %d", len(agent.history))
Philip Zeyliger9373c072025-05-01 10:27:01 -0700408 } else {
Josh Bleecher Snyder3b44cc32025-07-22 02:28:14 +0000409 slugMsg := agent.history[0]
410 if slugMsg.Type != SlugMessageType {
411 t.Errorf("Expected first message to be slug, got message type: %s", slugMsg.Type)
Sean McCullough9f4b8082025-04-30 17:34:07 +0000412 }
Josh Bleecher Snyder3b44cc32025-07-22 02:28:14 +0000413 errorMsg := agent.history[1]
414 if errorMsg.Type != ErrorMessageType {
415 t.Errorf("Expected second message to be error, got message type: %s", errorMsg.Type)
416 }
417 if !strings.Contains(errorMsg.Content, "unexpected nil response") {
418 t.Errorf("Expected error about nil response, got: %s", errorMsg.Content)
Sean McCullough9f4b8082025-04-30 17:34:07 +0000419 }
Sean McCullough9f4b8082025-04-30 17:34:07 +0000420 }
421}
Sean McCullough96b60dd2025-04-30 09:49:10 -0700422
423func TestAgentStateMachine(t *testing.T) {
424 // Create a simplified test for the state machine functionality
425 agent := &Agent{
426 stateMachine: NewStateMachine(),
427 }
428
429 // Initially the state should be Ready
430 if state := agent.CurrentState(); state != StateReady {
431 t.Errorf("Expected initial state to be StateReady, got %s", state)
432 }
433
434 // Test manual transitions to verify state tracking
435 ctx := context.Background()
436
437 // Track transitions
438 var transitions []State
439 agent.stateMachine.SetTransitionCallback(func(ctx context.Context, from, to State, event TransitionEvent) {
440 transitions = append(transitions, to)
441 t.Logf("State transition: %s -> %s (%s)", from, to, event.Description)
442 })
443
444 // Perform a valid sequence of transitions (based on the state machine rules)
445 expectedStates := []State{
446 StateWaitingForUserInput,
447 StateSendingToLLM,
448 StateProcessingLLMResponse,
449 StateToolUseRequested,
450 StateCheckingForCancellation,
451 StateRunningTool,
452 StateCheckingGitCommits,
453 StateRunningAutoformatters,
454 StateCheckingBudget,
455 StateGatheringAdditionalMessages,
456 StateSendingToolResults,
457 StateProcessingLLMResponse,
458 StateEndOfTurn,
459 }
460
461 // Manually perform each transition
462 for _, state := range expectedStates {
463 err := agent.stateMachine.Transition(ctx, state, "Test transition to "+state.String())
464 if err != nil {
465 t.Errorf("Failed to transition to %s: %v", state, err)
466 }
467 }
468
469 // Check if we recorded the right number of transitions
470 if len(transitions) != len(expectedStates) {
471 t.Errorf("Expected %d state transitions, got %d", len(expectedStates), len(transitions))
472 }
473
474 // Check each transition matched what we expected
475 for i, expected := range expectedStates {
476 if i < len(transitions) {
477 if transitions[i] != expected {
478 t.Errorf("Transition %d: expected %s, got %s", i, expected, transitions[i])
479 }
480 }
481 }
482
483 // Verify the current state is the last one we transitioned to
484 if state := agent.CurrentState(); state != expectedStates[len(expectedStates)-1] {
485 t.Errorf("Expected current state to be %s, got %s", expectedStates[len(expectedStates)-1], state)
486 }
487
488 // Test force transition
489 agent.stateMachine.ForceTransition(ctx, StateCancelled, "Testing force transition")
490
491 // Verify current state was updated
492 if state := agent.CurrentState(); state != StateCancelled {
493 t.Errorf("Expected forced state to be StateCancelled, got %s", state)
494 }
495}
496
497// mockConvoInterface is a mock implementation of ConvoInterface for testing
498type mockConvoInterface struct {
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700499 SendMessageFunc func(message llm.Message) (*llm.Response, error)
Josh Bleecher Snyder64f2aa82025-05-14 18:31:05 +0000500 ToolResultContentsFunc func(ctx context.Context, resp *llm.Response) ([]llm.Content, bool, error)
Sean McCullough96b60dd2025-04-30 09:49:10 -0700501}
502
503func (c *mockConvoInterface) GetID() string {
504 return "mockConvoInterface-id"
505}
506
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700507func (c *mockConvoInterface) SubConvoWithHistory() *conversation.Convo {
Sean McCullough96b60dd2025-04-30 09:49:10 -0700508 return nil
509}
510
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700511func (m *mockConvoInterface) CumulativeUsage() conversation.CumulativeUsage {
512 return conversation.CumulativeUsage{}
Sean McCullough96b60dd2025-04-30 09:49:10 -0700513}
514
Philip Zeyligerb8a8f352025-06-02 07:39:37 -0700515func (m *mockConvoInterface) LastUsage() llm.Usage {
516 return llm.Usage{}
517}
518
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700519func (m *mockConvoInterface) ResetBudget(conversation.Budget) {}
Sean McCullough96b60dd2025-04-30 09:49:10 -0700520
521func (m *mockConvoInterface) OverBudget() error {
522 return nil
523}
524
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700525func (m *mockConvoInterface) SendMessage(message llm.Message) (*llm.Response, error) {
Sean McCullough96b60dd2025-04-30 09:49:10 -0700526 if m.SendMessageFunc != nil {
527 return m.SendMessageFunc(message)
528 }
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700529 return &llm.Response{StopReason: llm.StopReasonEndTurn}, nil
Sean McCullough96b60dd2025-04-30 09:49:10 -0700530}
531
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700532func (m *mockConvoInterface) SendUserTextMessage(s string, otherContents ...llm.Content) (*llm.Response, error) {
533 return m.SendMessage(llm.UserStringMessage(s))
Sean McCullough96b60dd2025-04-30 09:49:10 -0700534}
535
Josh Bleecher Snyder64f2aa82025-05-14 18:31:05 +0000536func (m *mockConvoInterface) ToolResultContents(ctx context.Context, resp *llm.Response) ([]llm.Content, bool, error) {
Sean McCullough96b60dd2025-04-30 09:49:10 -0700537 if m.ToolResultContentsFunc != nil {
538 return m.ToolResultContentsFunc(ctx, resp)
539 }
Josh Bleecher Snyder64f2aa82025-05-14 18:31:05 +0000540 return []llm.Content{}, false, nil
Sean McCullough96b60dd2025-04-30 09:49:10 -0700541}
542
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700543func (m *mockConvoInterface) ToolResultCancelContents(resp *llm.Response) ([]llm.Content, error) {
544 return []llm.Content{llm.StringContent("Tool use cancelled")}, nil
Sean McCullough96b60dd2025-04-30 09:49:10 -0700545}
546
547func (m *mockConvoInterface) CancelToolUse(toolUseID string, cause error) error {
548 return nil
549}
550
Philip Zeyliger43a0bfc2025-07-14 14:54:27 -0700551func (m *mockConvoInterface) DebugJSON() ([]byte, error) {
552 return []byte(`[{"role": "user", "content": [{"type": "text", "text": "mock conversation"}]}]`), nil
553}
554
Sean McCullough96b60dd2025-04-30 09:49:10 -0700555func TestAgentProcessTurnStateTransitions(t *testing.T) {
556 // Create a mock ConvoInterface for testing
557 mockConvo := &mockConvoInterface{}
558
559 // Use the testing context
560 ctx := t.Context()
561
562 // Create an agent with the state machine
563 agent := &Agent{
Philip Zeyligerf2872992025-05-22 10:35:28 -0700564 convo: mockConvo,
565 config: AgentConfig{Context: ctx},
566 inbox: make(chan string, 10),
567 ready: make(chan struct{}),
568
Sean McCullough96b60dd2025-04-30 09:49:10 -0700569 outstandingLLMCalls: make(map[string]struct{}),
570 outstandingToolCalls: make(map[string]string),
571 stateMachine: NewStateMachine(),
572 startOfTurn: time.Now(),
Philip Zeyliger9373c072025-05-01 10:27:01 -0700573 subscribers: []chan *AgentMessage{},
Sean McCullough96b60dd2025-04-30 09:49:10 -0700574 }
575
576 // Verify initial state
577 if state := agent.CurrentState(); state != StateReady {
578 t.Errorf("Expected initial state to be StateReady, got %s", state)
579 }
580
581 // Add a message to the inbox so we don't block in GatherMessages
582 agent.inbox <- "Test message"
583
584 // Setup the mock to simulate a model response with end of turn
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700585 mockConvo.SendMessageFunc = func(message llm.Message) (*llm.Response, error) {
586 return &llm.Response{
587 StopReason: llm.StopReasonEndTurn,
588 Content: []llm.Content{
589 llm.StringContent("This is a test response"),
Sean McCullough96b60dd2025-04-30 09:49:10 -0700590 },
591 }, nil
592 }
593
594 // Track state transitions
595 var transitions []State
596 agent.stateMachine.SetTransitionCallback(func(ctx context.Context, from, to State, event TransitionEvent) {
597 transitions = append(transitions, to)
598 t.Logf("State transition: %s -> %s (%s)", from, to, event.Description)
599 })
600
601 // Process a turn, which should trigger state transitions
602 agent.processTurn(ctx)
603
604 // The minimum expected states for a simple end-of-turn response
605 minExpectedStates := []State{
606 StateWaitingForUserInput,
607 StateSendingToLLM,
608 StateProcessingLLMResponse,
609 StateEndOfTurn,
610 }
611
612 // Verify we have at least the minimum expected states
613 if len(transitions) < len(minExpectedStates) {
614 t.Errorf("Expected at least %d state transitions, got %d", len(minExpectedStates), len(transitions))
615 }
616
617 // Check that the transitions follow the expected sequence
618 for i, expected := range minExpectedStates {
619 if i < len(transitions) {
620 if transitions[i] != expected {
621 t.Errorf("Transition %d: expected %s, got %s", i, expected, transitions[i])
622 }
623 }
624 }
625
626 // Verify the final state is EndOfTurn
627 if state := agent.CurrentState(); state != StateEndOfTurn {
628 t.Errorf("Expected final state to be StateEndOfTurn, got %s", state)
629 }
630}
631
632func TestAgentProcessTurnWithToolUse(t *testing.T) {
633 // Create a mock ConvoInterface for testing
634 mockConvo := &mockConvoInterface{}
635
636 // Setup a test context
637 ctx := context.Background()
638
639 // Create an agent with the state machine
640 agent := &Agent{
Philip Zeyligerf2872992025-05-22 10:35:28 -0700641 convo: mockConvo,
642 config: AgentConfig{Context: ctx},
643 inbox: make(chan string, 10),
644 ready: make(chan struct{}),
645
Sean McCullough96b60dd2025-04-30 09:49:10 -0700646 outstandingLLMCalls: make(map[string]struct{}),
647 outstandingToolCalls: make(map[string]string),
648 stateMachine: NewStateMachine(),
649 startOfTurn: time.Now(),
Philip Zeyliger9373c072025-05-01 10:27:01 -0700650 subscribers: []chan *AgentMessage{},
Sean McCullough96b60dd2025-04-30 09:49:10 -0700651 }
652
653 // Add a message to the inbox so we don't block in GatherMessages
654 agent.inbox <- "Test message"
655
656 // First response requests a tool
657 firstResponseDone := false
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700658 mockConvo.SendMessageFunc = func(message llm.Message) (*llm.Response, error) {
Sean McCullough96b60dd2025-04-30 09:49:10 -0700659 if !firstResponseDone {
660 firstResponseDone = true
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700661 return &llm.Response{
662 StopReason: llm.StopReasonToolUse,
663 Content: []llm.Content{
664 llm.StringContent("I'll use a tool"),
665 {Type: llm.ContentTypeToolUse, ToolName: "test_tool", ToolInput: []byte("{}"), ID: "test_id"},
Sean McCullough96b60dd2025-04-30 09:49:10 -0700666 },
667 }, nil
668 }
669 // Second response ends the turn
Josh Bleecher Snyder4f84ab72025-04-22 16:40:54 -0700670 return &llm.Response{
671 StopReason: llm.StopReasonEndTurn,
672 Content: []llm.Content{
673 llm.StringContent("Finished using the tool"),
Sean McCullough96b60dd2025-04-30 09:49:10 -0700674 },
675 }, nil
676 }
677
678 // Tool result content handler
Josh Bleecher Snyder64f2aa82025-05-14 18:31:05 +0000679 mockConvo.ToolResultContentsFunc = func(ctx context.Context, resp *llm.Response) ([]llm.Content, bool, error) {
680 return []llm.Content{llm.StringContent("Tool executed successfully")}, false, nil
Sean McCullough96b60dd2025-04-30 09:49:10 -0700681 }
682
683 // Track state transitions
684 var transitions []State
685 agent.stateMachine.SetTransitionCallback(func(ctx context.Context, from, to State, event TransitionEvent) {
686 transitions = append(transitions, to)
687 t.Logf("State transition: %s -> %s (%s)", from, to, event.Description)
688 })
689
690 // Process a turn with tool use
691 agent.processTurn(ctx)
692
693 // Define expected states for a tool use flow
694 expectedToolStates := []State{
695 StateWaitingForUserInput,
696 StateSendingToLLM,
697 StateProcessingLLMResponse,
698 StateToolUseRequested,
699 StateCheckingForCancellation,
700 StateRunningTool,
701 }
702
703 // Verify that these states are present in order
704 for i, expectedState := range expectedToolStates {
705 if i >= len(transitions) {
706 t.Errorf("Missing expected transition to %s; only got %d transitions", expectedState, len(transitions))
707 continue
708 }
709 if transitions[i] != expectedState {
710 t.Errorf("Expected transition %d to be %s, got %s", i, expectedState, transitions[i])
711 }
712 }
713
714 // Also verify we eventually reached EndOfTurn
715 if !slices.Contains(transitions, StateEndOfTurn) {
716 t.Errorf("Expected to eventually reach StateEndOfTurn, but never did")
717 }
718}
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700719
Philip Zeyliger72252cb2025-05-10 17:00:08 -0700720func TestPushToOutbox(t *testing.T) {
721 // Create a new agent
722 a := &Agent{
723 outstandingLLMCalls: make(map[string]struct{}),
724 outstandingToolCalls: make(map[string]string),
725 stateMachine: NewStateMachine(),
726 subscribers: make([]chan *AgentMessage, 0),
727 }
728
729 // Create a channel to receive messages
730 messageCh := make(chan *AgentMessage, 1)
731
732 // Add the channel to the subscribers list
733 a.mu.Lock()
734 a.subscribers = append(a.subscribers, messageCh)
735 a.mu.Unlock()
736
737 // We need to set the text that would be produced by our modified contentToString function
738 resultText := "test resultnested result" // Directly set the expected output
739
740 // In a real-world scenario, this would be coming from a toolResult that contained nested content
741
742 m := AgentMessage{
743 Type: ToolUseMessageType,
744 ToolResult: resultText,
745 }
746
747 // Push the message to the outbox
748 a.pushToOutbox(context.Background(), m)
749
750 // Receive the message from the subscriber
751 received := <-messageCh
752
753 // Check that the Content field contains the concatenated text from ToolResult
754 expected := "test resultnested result"
755 if received.Content != expected {
756 t.Errorf("Expected Content to be %q, got %q", expected, received.Content)
757 }
758}
Josh Bleecher Snyder3b44cc32025-07-22 02:28:14 +0000759
760// TestCleanSlugName tests the slug cleaning function
761func TestCleanSlugName(t *testing.T) {
762 tests := []struct {
763 name string
764 input string
765 want string
766 }{
767 {"simple lowercase", "fix-bug", "fix-bug"},
768 {"uppercase to lowercase", "FIX-BUG", "fix-bug"},
769 {"spaces to hyphens", "fix login bug", "fix-login-bug"},
770 {"mixed case and spaces", "Fix Login Bug", "fix-login-bug"},
771 {"special characters removed", "fix_bug@home!", "fixbughome"},
772 {"multiple hyphens preserved", "fix--bug---here", "fix--bug---here"},
773 {"leading/trailing hyphens preserved", "-fix-bug-", "-fix-bug-"},
774 {"numbers preserved", "fix-bug-v2", "fix-bug-v2"},
775 {"empty string", "", ""},
776 {"only special chars", "@#$%", ""},
777 }
778
779 for _, tt := range tests {
780 t.Run(tt.name, func(t *testing.T) {
781 got := cleanSlugName(tt.input)
782 if got != tt.want {
783 t.Errorf("cleanSlugName(%q) = %q, want %q", tt.input, got, tt.want)
784 }
785 })
786 }
787}
788
789// TestAutoGenerateSlugInputValidation tests input validation for auto slug generation
790func TestAutoGenerateSlugInputValidation(t *testing.T) {
791 // Test soleText with empty input
792 emptyContents := []llm.Content{}
793 _, err := soleText(emptyContents)
794 if err == nil {
795 t.Errorf("Expected error for empty contents, got nil")
796 }
797
798 // Test with non-text content only
799 nonTextContents := []llm.Content{
800 {Type: llm.ContentTypeToolUse, ToolName: "bash"},
801 }
802 _, err = soleText(nonTextContents)
803 if err == nil {
804 t.Errorf("Expected error for non-text contents, got nil")
805 }
806
807 // Test slug formatting
808 testInputs := []string{
809 "Fix the login bug",
810 "Add user authentication system",
811 "Refactor API endpoints",
812 "Update documentation",
813 }
814
815 for _, input := range testInputs {
816 slug := cleanSlugName(strings.ToLower(strings.ReplaceAll(input, " ", "-")))
817 if slug == "" {
818 t.Errorf("cleanSlugName produced empty result for input %q", input)
819 }
820 if !strings.Contains(slug, "-") {
821 // We expect most multi-word inputs to contain hyphens after processing
822 t.Logf("Input %q produced slug %q (no hyphen found, might be single word)", input, slug)
823 }
824 }
825}
826
827// TestSoleText tests the soleText helper function
828func TestSoleText(t *testing.T) {
829 tests := []struct {
830 name string
831 contents []llm.Content
832 wantText string
833 wantErr bool
834 }{
835 {
836 name: "single text content",
837 contents: []llm.Content{
838 {Type: llm.ContentTypeText, Text: " Hello world "},
839 },
840 wantText: "Hello world",
841 wantErr: false,
842 },
843 {
844 name: "empty slice",
845 contents: []llm.Content{},
846 wantText: "",
847 wantErr: true,
848 },
849 {
850 name: "multiple contents",
851 contents: []llm.Content{
852 {Type: llm.ContentTypeText, Text: "First"},
853 {Type: llm.ContentTypeText, Text: "Second"},
854 },
855 wantText: "",
856 wantErr: true,
857 },
858 {
859 name: "non-text content",
860 contents: []llm.Content{
861 {Type: llm.ContentTypeToolUse, ToolName: "bash"},
862 },
863 wantText: "",
864 wantErr: true,
865 },
866 {
867 name: "empty text content",
868 contents: []llm.Content{
869 {Type: llm.ContentTypeText, Text: ""},
870 },
871 wantText: "",
872 wantErr: true,
873 },
874 }
875
876 for _, tt := range tests {
877 t.Run(tt.name, func(t *testing.T) {
878 gotText, err := soleText(tt.contents)
879 if (err != nil) != tt.wantErr {
880 t.Errorf("soleText() error = %v, wantErr %v", err, tt.wantErr)
881 return
882 }
883 if gotText != tt.wantText {
884 t.Errorf("soleText() gotText = %v, want %v", gotText, tt.wantText)
885 }
886 })
887 }
888}