blob: 9663e268d95c67a18ec38ccce131ca718f4aa6a8 [file] [log] [blame]
Earl Lee2e463fb2025-04-17 11:22:22 -07001package loop
2
3import (
4 "context"
Sean McCullough9f4b8082025-04-30 17:34:07 +00005 "fmt"
Earl Lee2e463fb2025-04-17 11:22:22 -07006 "net/http"
7 "os"
Sean McCullough96b60dd2025-04-30 09:49:10 -07008 "slices"
Earl Lee2e463fb2025-04-17 11:22:22 -07009 "strings"
10 "testing"
11 "time"
12
13 "sketch.dev/ant"
14 "sketch.dev/httprr"
15)
16
17// TestAgentLoop tests that the Agent loop functionality works correctly.
18// It uses the httprr package to record HTTP interactions for replay in tests.
19// When failing, rebuild with "go test ./sketch/loop -run TestAgentLoop -httprecord .*agent_loop.*"
20// as necessary.
21func TestAgentLoop(t *testing.T) {
22 ctx := context.Background()
23
24 // Setup httprr recorder
25 rrPath := "testdata/agent_loop.httprr"
26 rr, err := httprr.Open(rrPath, http.DefaultTransport)
27 if err != nil && !os.IsNotExist(err) {
28 t.Fatal(err)
29 }
30
31 if rr.Recording() {
32 // Skip the test if API key is not available
33 if os.Getenv("ANTHROPIC_API_KEY") == "" {
34 t.Fatal("ANTHROPIC_API_KEY not set, required for HTTP recording")
35 }
36 }
37
38 // Create HTTP client
39 var client *http.Client
40 if rr != nil {
41 // Scrub API keys from requests for security
42 rr.ScrubReq(func(req *http.Request) error {
43 req.Header.Del("x-api-key")
44 req.Header.Del("anthropic-api-key")
45 return nil
46 })
47 client = rr.Client()
48 } else {
49 client = &http.Client{Transport: http.DefaultTransport}
50 }
51
52 // Create a new agent with the httprr client
53 origWD, err := os.Getwd()
54 if err != nil {
55 t.Fatal(err)
56 }
57 if err := os.Chdir("/"); err != nil {
58 t.Fatal(err)
59 }
60 budget := ant.Budget{MaxResponses: 100}
61 wd, err := os.Getwd()
62 if err != nil {
63 t.Fatal(err)
64 }
65
66 cfg := AgentConfig{
67 Context: ctx,
68 APIKey: os.Getenv("ANTHROPIC_API_KEY"),
69 HTTPC: client,
70 Budget: budget,
71 GitUsername: "Test Agent",
72 GitEmail: "totallyhuman@sketch.dev",
73 SessionID: "test-session-id",
74 ClientGOOS: "linux",
75 ClientGOARCH: "amd64",
76 }
77 agent := NewAgent(cfg)
78 if err := os.Chdir(origWD); err != nil {
79 t.Fatal(err)
80 }
81 err = agent.Init(AgentInit{WorkingDir: wd, NoGit: true})
82 if err != nil {
83 t.Fatal(err)
84 }
85
86 // Setup a test message that will trigger a simple, predictable response
87 userMessage := "What tools are available to you? Please just list them briefly."
88
89 // Send the message to the agent
90 agent.UserMessage(ctx, userMessage)
91
92 // Process a single loop iteration to avoid long-running tests
Sean McCullough885a16a2025-04-30 02:49:25 +000093 agent.processTurn(ctx)
Earl Lee2e463fb2025-04-17 11:22:22 -070094
95 // Collect responses with a timeout
96 var responses []AgentMessage
Philip Zeyligerb7c58752025-05-01 10:10:17 -070097 ctx2, _ := context.WithDeadline(ctx, time.Now().Add(10*time.Second))
Earl Lee2e463fb2025-04-17 11:22:22 -070098 done := false
Philip Zeyligerb7c58752025-05-01 10:10:17 -070099 it := agent.NewIterator(ctx2, 0)
Earl Lee2e463fb2025-04-17 11:22:22 -0700100
101 for !done {
Philip Zeyligerb7c58752025-05-01 10:10:17 -0700102 msg := it.Next()
103 t.Logf("Received message: Type=%s, EndOfTurn=%v, Content=%q", msg.Type, msg.EndOfTurn, msg.Content)
104 responses = append(responses, *msg)
105 if msg.EndOfTurn {
Earl Lee2e463fb2025-04-17 11:22:22 -0700106 done = true
Earl Lee2e463fb2025-04-17 11:22:22 -0700107 }
108 }
109
110 // Verify we got at least one response
111 if len(responses) == 0 {
112 t.Fatal("No responses received from agent")
113 }
114
115 // Log the received responses for debugging
116 t.Logf("Received %d responses", len(responses))
117
118 // Find the final agent response (with EndOfTurn=true)
119 var finalResponse *AgentMessage
120 for i := range responses {
121 if responses[i].Type == AgentMessageType && responses[i].EndOfTurn {
122 finalResponse = &responses[i]
123 break
124 }
125 }
126
127 // Verify we got a final agent response
128 if finalResponse == nil {
129 t.Fatal("No final agent response received")
130 }
131
132 // Check that the response contains tools information
133 if !strings.Contains(strings.ToLower(finalResponse.Content), "tool") {
134 t.Error("Expected response to mention tools")
135 }
136
137 // Count how many tool use messages we received
138 toolUseCount := 0
139 for _, msg := range responses {
140 if msg.Type == ToolUseMessageType {
141 toolUseCount++
142 }
143 }
144
145 t.Logf("Agent used %d tools in its response", toolUseCount)
146}
Philip Zeyliger99a9a022025-04-27 15:15:25 +0000147
148func TestAgentTracksOutstandingCalls(t *testing.T) {
149 agent := &Agent{
150 outstandingLLMCalls: make(map[string]struct{}),
151 outstandingToolCalls: make(map[string]string),
Sean McCullough96b60dd2025-04-30 09:49:10 -0700152 stateMachine: NewStateMachine(),
Philip Zeyliger99a9a022025-04-27 15:15:25 +0000153 }
154
155 // Check initial state
156 if count := agent.OutstandingLLMCallCount(); count != 0 {
157 t.Errorf("Expected 0 outstanding LLM calls, got %d", count)
158 }
159
160 if tools := agent.OutstandingToolCalls(); len(tools) != 0 {
161 t.Errorf("Expected 0 outstanding tool calls, got %d", len(tools))
162 }
163
164 // Add some calls
165 agent.mu.Lock()
166 agent.outstandingLLMCalls["llm1"] = struct{}{}
167 agent.outstandingToolCalls["tool1"] = "bash"
168 agent.outstandingToolCalls["tool2"] = "think"
169 agent.mu.Unlock()
170
171 // Check tracking works
172 if count := agent.OutstandingLLMCallCount(); count != 1 {
173 t.Errorf("Expected 1 outstanding LLM call, got %d", count)
174 }
175
176 tools := agent.OutstandingToolCalls()
177 if len(tools) != 2 {
178 t.Errorf("Expected 2 outstanding tool calls, got %d", len(tools))
179 }
180
181 // Check removal
182 agent.mu.Lock()
183 delete(agent.outstandingLLMCalls, "llm1")
184 delete(agent.outstandingToolCalls, "tool1")
185 agent.mu.Unlock()
186
187 if count := agent.OutstandingLLMCallCount(); count != 0 {
188 t.Errorf("Expected 0 outstanding LLM calls after removal, got %d", count)
189 }
190
191 tools = agent.OutstandingToolCalls()
192 if len(tools) != 1 {
193 t.Errorf("Expected 1 outstanding tool call after removal, got %d", len(tools))
194 }
195
196 if tools[0] != "think" {
197 t.Errorf("Expected 'think' tool remaining, got %s", tools[0])
198 }
199}
Sean McCullough9f4b8082025-04-30 17:34:07 +0000200
201// TestAgentProcessTurnWithNilResponse tests the scenario where Agent.processTurn receives
202// a nil value for initialResp from processUserMessage.
203func TestAgentProcessTurnWithNilResponse(t *testing.T) {
204 // Create a mock conversation that will return nil and error
205 mockConvo := &MockConvoInterface{
206 sendMessageFunc: func(message ant.Message) (*ant.MessageResponse, error) {
207 return nil, fmt.Errorf("test error: simulating nil response")
208 },
209 }
210
211 // Create a minimal Agent instance for testing
212 agent := &Agent{
213 convo: mockConvo,
214 inbox: make(chan string, 10),
215 outbox: make(chan AgentMessage, 10),
216 outstandingLLMCalls: make(map[string]struct{}),
217 outstandingToolCalls: make(map[string]string),
218 }
219
220 // Create a test context
221 ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
222 defer cancel()
223
224 // Push a test message to the inbox so that processUserMessage will try to process it
225 agent.inbox <- "Test message"
226
227 // Call processTurn - it should exit early without panic when initialResp is nil
228 agent.processTurn(ctx)
229
230 // Verify the error message was added to outbox
231 select {
232 case msg := <-agent.outbox:
233 if msg.Type != ErrorMessageType {
234 t.Errorf("Expected error message, got message type: %s", msg.Type)
235 }
236 if !strings.Contains(msg.Content, "simulating nil response") {
237 t.Errorf("Expected error message to contain 'simulating nil response', got: %s", msg.Content)
238 }
239 case <-time.After(time.Second):
240 t.Error("Timed out waiting for error message in outbox")
241 }
242
243 // No more messages should be in the outbox since processTurn should exit early
244 select {
245 case msg := <-agent.outbox:
246 t.Errorf("Expected no more messages in outbox, but got: %+v", msg)
247 case <-time.After(100 * time.Millisecond):
248 // This is the expected outcome - no more messages
249 }
250}
251
252// MockConvoInterface implements the ConvoInterface for testing
253type MockConvoInterface struct {
254 sendMessageFunc func(message ant.Message) (*ant.MessageResponse, error)
255 sendUserTextMessageFunc func(s string, otherContents ...ant.Content) (*ant.MessageResponse, error)
256 toolResultContentsFunc func(ctx context.Context, resp *ant.MessageResponse) ([]ant.Content, error)
257 toolResultCancelContentsFunc func(resp *ant.MessageResponse) ([]ant.Content, error)
258 cancelToolUseFunc func(toolUseID string, cause error) error
259 cumulativeUsageFunc func() ant.CumulativeUsage
260 resetBudgetFunc func(ant.Budget)
261 overBudgetFunc func() error
Philip Zeyliger2c4db092025-04-28 16:57:50 -0700262 getIDFunc func() string
263 subConvoWithHistoryFunc func() *ant.Convo
Sean McCullough9f4b8082025-04-30 17:34:07 +0000264}
265
266func (m *MockConvoInterface) SendMessage(message ant.Message) (*ant.MessageResponse, error) {
267 if m.sendMessageFunc != nil {
268 return m.sendMessageFunc(message)
269 }
270 return nil, nil
271}
272
273func (m *MockConvoInterface) SendUserTextMessage(s string, otherContents ...ant.Content) (*ant.MessageResponse, error) {
274 if m.sendUserTextMessageFunc != nil {
275 return m.sendUserTextMessageFunc(s, otherContents...)
276 }
277 return nil, nil
278}
279
280func (m *MockConvoInterface) ToolResultContents(ctx context.Context, resp *ant.MessageResponse) ([]ant.Content, error) {
281 if m.toolResultContentsFunc != nil {
282 return m.toolResultContentsFunc(ctx, resp)
283 }
284 return nil, nil
285}
286
287func (m *MockConvoInterface) ToolResultCancelContents(resp *ant.MessageResponse) ([]ant.Content, error) {
288 if m.toolResultCancelContentsFunc != nil {
289 return m.toolResultCancelContentsFunc(resp)
290 }
291 return nil, nil
292}
293
294func (m *MockConvoInterface) CancelToolUse(toolUseID string, cause error) error {
295 if m.cancelToolUseFunc != nil {
296 return m.cancelToolUseFunc(toolUseID, cause)
297 }
298 return nil
299}
300
301func (m *MockConvoInterface) CumulativeUsage() ant.CumulativeUsage {
302 if m.cumulativeUsageFunc != nil {
303 return m.cumulativeUsageFunc()
304 }
305 return ant.CumulativeUsage{}
306}
307
308func (m *MockConvoInterface) ResetBudget(budget ant.Budget) {
309 if m.resetBudgetFunc != nil {
310 m.resetBudgetFunc(budget)
311 }
312}
313
314func (m *MockConvoInterface) OverBudget() error {
315 if m.overBudgetFunc != nil {
316 return m.overBudgetFunc()
317 }
318 return nil
319}
320
Philip Zeyliger2c4db092025-04-28 16:57:50 -0700321func (m *MockConvoInterface) GetID() string {
322 if m.getIDFunc != nil {
323 return m.getIDFunc()
324 }
325 return "mock-convo-id"
326}
327
328func (m *MockConvoInterface) SubConvoWithHistory() *ant.Convo {
329 if m.subConvoWithHistoryFunc != nil {
330 return m.subConvoWithHistoryFunc()
331 }
332 return nil
333}
334
Sean McCullough9f4b8082025-04-30 17:34:07 +0000335// TestAgentProcessTurnWithNilResponseNilError tests the scenario where Agent.processTurn receives
336// a nil value for initialResp and nil error from processUserMessage.
337// This test verifies that the implementation properly handles this edge case.
338func TestAgentProcessTurnWithNilResponseNilError(t *testing.T) {
339 // Create a mock conversation that will return nil response and nil error
340 mockConvo := &MockConvoInterface{
341 sendMessageFunc: func(message ant.Message) (*ant.MessageResponse, error) {
342 return nil, nil // This is unusual but now handled gracefully
343 },
344 }
345
346 // Create a minimal Agent instance for testing
347 agent := &Agent{
348 convo: mockConvo,
349 inbox: make(chan string, 10),
350 outbox: make(chan AgentMessage, 10),
351 outstandingLLMCalls: make(map[string]struct{}),
352 outstandingToolCalls: make(map[string]string),
353 }
354
355 // Create a test context
356 ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
357 defer cancel()
358
359 // Push a test message to the inbox so that processUserMessage will try to process it
360 agent.inbox <- "Test message"
361
362 // Call processTurn - it should handle nil initialResp with a descriptive error
363 err := agent.processTurn(ctx)
364
365 // Verify we get the expected error
366 if err == nil {
367 t.Error("Expected processTurn to return an error for nil initialResp, but got nil")
368 } else if !strings.Contains(err.Error(), "unexpected nil response") {
369 t.Errorf("Expected error about nil response, got: %v", err)
370 } else {
371 t.Logf("As expected, processTurn returned error: %v", err)
372 }
373
374 // Verify an error message was sent to the outbox
375 select {
376 case msg := <-agent.outbox:
377 if msg.Type != ErrorMessageType {
378 t.Errorf("Expected error message type, got: %s", msg.Type)
379 }
380 if !strings.Contains(msg.Content, "unexpected nil response") {
381 t.Errorf("Expected error about nil response, got: %s", msg.Content)
382 }
383 case <-time.After(time.Second):
384 t.Error("Timed out waiting for error message in outbox")
385 }
386}
Sean McCullough96b60dd2025-04-30 09:49:10 -0700387
388func TestAgentStateMachine(t *testing.T) {
389 // Create a simplified test for the state machine functionality
390 agent := &Agent{
391 stateMachine: NewStateMachine(),
392 }
393
394 // Initially the state should be Ready
395 if state := agent.CurrentState(); state != StateReady {
396 t.Errorf("Expected initial state to be StateReady, got %s", state)
397 }
398
399 // Test manual transitions to verify state tracking
400 ctx := context.Background()
401
402 // Track transitions
403 var transitions []State
404 agent.stateMachine.SetTransitionCallback(func(ctx context.Context, from, to State, event TransitionEvent) {
405 transitions = append(transitions, to)
406 t.Logf("State transition: %s -> %s (%s)", from, to, event.Description)
407 })
408
409 // Perform a valid sequence of transitions (based on the state machine rules)
410 expectedStates := []State{
411 StateWaitingForUserInput,
412 StateSendingToLLM,
413 StateProcessingLLMResponse,
414 StateToolUseRequested,
415 StateCheckingForCancellation,
416 StateRunningTool,
417 StateCheckingGitCommits,
418 StateRunningAutoformatters,
419 StateCheckingBudget,
420 StateGatheringAdditionalMessages,
421 StateSendingToolResults,
422 StateProcessingLLMResponse,
423 StateEndOfTurn,
424 }
425
426 // Manually perform each transition
427 for _, state := range expectedStates {
428 err := agent.stateMachine.Transition(ctx, state, "Test transition to "+state.String())
429 if err != nil {
430 t.Errorf("Failed to transition to %s: %v", state, err)
431 }
432 }
433
434 // Check if we recorded the right number of transitions
435 if len(transitions) != len(expectedStates) {
436 t.Errorf("Expected %d state transitions, got %d", len(expectedStates), len(transitions))
437 }
438
439 // Check each transition matched what we expected
440 for i, expected := range expectedStates {
441 if i < len(transitions) {
442 if transitions[i] != expected {
443 t.Errorf("Transition %d: expected %s, got %s", i, expected, transitions[i])
444 }
445 }
446 }
447
448 // Verify the current state is the last one we transitioned to
449 if state := agent.CurrentState(); state != expectedStates[len(expectedStates)-1] {
450 t.Errorf("Expected current state to be %s, got %s", expectedStates[len(expectedStates)-1], state)
451 }
452
453 // Test force transition
454 agent.stateMachine.ForceTransition(ctx, StateCancelled, "Testing force transition")
455
456 // Verify current state was updated
457 if state := agent.CurrentState(); state != StateCancelled {
458 t.Errorf("Expected forced state to be StateCancelled, got %s", state)
459 }
460}
461
462// mockConvoInterface is a mock implementation of ConvoInterface for testing
463type mockConvoInterface struct {
464 SendMessageFunc func(message ant.Message) (*ant.MessageResponse, error)
465 ToolResultContentsFunc func(ctx context.Context, resp *ant.MessageResponse) ([]ant.Content, error)
466}
467
468func (c *mockConvoInterface) GetID() string {
469 return "mockConvoInterface-id"
470}
471
472func (c *mockConvoInterface) SubConvoWithHistory() *ant.Convo {
473 return nil
474}
475
476func (m *mockConvoInterface) CumulativeUsage() ant.CumulativeUsage {
477 return ant.CumulativeUsage{}
478}
479
480func (m *mockConvoInterface) ResetBudget(ant.Budget) {}
481
482func (m *mockConvoInterface) OverBudget() error {
483 return nil
484}
485
486func (m *mockConvoInterface) SendMessage(message ant.Message) (*ant.MessageResponse, error) {
487 if m.SendMessageFunc != nil {
488 return m.SendMessageFunc(message)
489 }
490 return &ant.MessageResponse{StopReason: ant.StopReasonEndTurn}, nil
491}
492
493func (m *mockConvoInterface) SendUserTextMessage(s string, otherContents ...ant.Content) (*ant.MessageResponse, error) {
494 return m.SendMessage(ant.Message{Role: "user", Content: []ant.Content{{Type: "text", Text: s}}})
495}
496
497func (m *mockConvoInterface) ToolResultContents(ctx context.Context, resp *ant.MessageResponse) ([]ant.Content, error) {
498 if m.ToolResultContentsFunc != nil {
499 return m.ToolResultContentsFunc(ctx, resp)
500 }
501 return []ant.Content{}, nil
502}
503
504func (m *mockConvoInterface) ToolResultCancelContents(resp *ant.MessageResponse) ([]ant.Content, error) {
505 return []ant.Content{{Type: "text", Text: "Tool use cancelled"}}, nil
506}
507
508func (m *mockConvoInterface) CancelToolUse(toolUseID string, cause error) error {
509 return nil
510}
511
512func TestAgentProcessTurnStateTransitions(t *testing.T) {
513 // Create a mock ConvoInterface for testing
514 mockConvo := &mockConvoInterface{}
515
516 // Use the testing context
517 ctx := t.Context()
518
519 // Create an agent with the state machine
520 agent := &Agent{
521 convo: mockConvo,
522 config: AgentConfig{Context: ctx},
523 inbox: make(chan string, 10),
524 outbox: make(chan AgentMessage, 10),
525 ready: make(chan struct{}),
526 seenCommits: make(map[string]bool),
527 outstandingLLMCalls: make(map[string]struct{}),
528 outstandingToolCalls: make(map[string]string),
529 stateMachine: NewStateMachine(),
530 startOfTurn: time.Now(),
531 }
532
533 // Verify initial state
534 if state := agent.CurrentState(); state != StateReady {
535 t.Errorf("Expected initial state to be StateReady, got %s", state)
536 }
537
538 // Add a message to the inbox so we don't block in GatherMessages
539 agent.inbox <- "Test message"
540
541 // Setup the mock to simulate a model response with end of turn
542 mockConvo.SendMessageFunc = func(message ant.Message) (*ant.MessageResponse, error) {
543 return &ant.MessageResponse{
544 StopReason: ant.StopReasonEndTurn,
545 Content: []ant.Content{
546 {Type: "text", Text: "This is a test response"},
547 },
548 }, nil
549 }
550
551 // Track state transitions
552 var transitions []State
553 agent.stateMachine.SetTransitionCallback(func(ctx context.Context, from, to State, event TransitionEvent) {
554 transitions = append(transitions, to)
555 t.Logf("State transition: %s -> %s (%s)", from, to, event.Description)
556 })
557
558 // Process a turn, which should trigger state transitions
559 agent.processTurn(ctx)
560
561 // The minimum expected states for a simple end-of-turn response
562 minExpectedStates := []State{
563 StateWaitingForUserInput,
564 StateSendingToLLM,
565 StateProcessingLLMResponse,
566 StateEndOfTurn,
567 }
568
569 // Verify we have at least the minimum expected states
570 if len(transitions) < len(minExpectedStates) {
571 t.Errorf("Expected at least %d state transitions, got %d", len(minExpectedStates), len(transitions))
572 }
573
574 // Check that the transitions follow the expected sequence
575 for i, expected := range minExpectedStates {
576 if i < len(transitions) {
577 if transitions[i] != expected {
578 t.Errorf("Transition %d: expected %s, got %s", i, expected, transitions[i])
579 }
580 }
581 }
582
583 // Verify the final state is EndOfTurn
584 if state := agent.CurrentState(); state != StateEndOfTurn {
585 t.Errorf("Expected final state to be StateEndOfTurn, got %s", state)
586 }
587}
588
589func TestAgentProcessTurnWithToolUse(t *testing.T) {
590 // Create a mock ConvoInterface for testing
591 mockConvo := &mockConvoInterface{}
592
593 // Setup a test context
594 ctx := context.Background()
595
596 // Create an agent with the state machine
597 agent := &Agent{
598 convo: mockConvo,
599 config: AgentConfig{Context: ctx},
600 inbox: make(chan string, 10),
601 outbox: make(chan AgentMessage, 10),
602 ready: make(chan struct{}),
603 seenCommits: make(map[string]bool),
604 outstandingLLMCalls: make(map[string]struct{}),
605 outstandingToolCalls: make(map[string]string),
606 stateMachine: NewStateMachine(),
607 startOfTurn: time.Now(),
608 }
609
610 // Add a message to the inbox so we don't block in GatherMessages
611 agent.inbox <- "Test message"
612
613 // First response requests a tool
614 firstResponseDone := false
615 mockConvo.SendMessageFunc = func(message ant.Message) (*ant.MessageResponse, error) {
616 if !firstResponseDone {
617 firstResponseDone = true
618 return &ant.MessageResponse{
619 StopReason: ant.StopReasonToolUse,
620 Content: []ant.Content{
621 {Type: "text", Text: "I'll use a tool"},
622 {Type: "tool_use", ToolName: "test_tool", ToolInput: []byte("{}"), ID: "test_id"},
623 },
624 }, nil
625 }
626 // Second response ends the turn
627 return &ant.MessageResponse{
628 StopReason: ant.StopReasonEndTurn,
629 Content: []ant.Content{
630 {Type: "text", Text: "Finished using the tool"},
631 },
632 }, nil
633 }
634
635 // Tool result content handler
636 mockConvo.ToolResultContentsFunc = func(ctx context.Context, resp *ant.MessageResponse) ([]ant.Content, error) {
637 return []ant.Content{{Type: "text", Text: "Tool executed successfully"}}, nil
638 }
639
640 // Track state transitions
641 var transitions []State
642 agent.stateMachine.SetTransitionCallback(func(ctx context.Context, from, to State, event TransitionEvent) {
643 transitions = append(transitions, to)
644 t.Logf("State transition: %s -> %s (%s)", from, to, event.Description)
645 })
646
647 // Process a turn with tool use
648 agent.processTurn(ctx)
649
650 // Define expected states for a tool use flow
651 expectedToolStates := []State{
652 StateWaitingForUserInput,
653 StateSendingToLLM,
654 StateProcessingLLMResponse,
655 StateToolUseRequested,
656 StateCheckingForCancellation,
657 StateRunningTool,
658 }
659
660 // Verify that these states are present in order
661 for i, expectedState := range expectedToolStates {
662 if i >= len(transitions) {
663 t.Errorf("Missing expected transition to %s; only got %d transitions", expectedState, len(transitions))
664 continue
665 }
666 if transitions[i] != expectedState {
667 t.Errorf("Expected transition %d to be %s, got %s", i, expectedState, transitions[i])
668 }
669 }
670
671 // Also verify we eventually reached EndOfTurn
672 if !slices.Contains(transitions, StateEndOfTurn) {
673 t.Errorf("Expected to eventually reach StateEndOfTurn, but never did")
674 }
675}