Blame - loop/agent_test.go - sketch

blob: c62fd21554b9e029e6e2d0f0428b07c74835b69d [file] [log] [blame]

Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	1	package loop
				2
				3	import (
				4	"context"
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	5	"fmt"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	6	"net/http"
				7	"os"
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame^]	8	"slices"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	9	"strings"
				10	"testing"
				11	"time"
				12
				13	"sketch.dev/ant"
				14	"sketch.dev/httprr"
				15	)
				16
				17	// TestAgentLoop tests that the Agent loop functionality works correctly.
				18	// It uses the httprr package to record HTTP interactions for replay in tests.
				19	// When failing, rebuild with "go test ./sketch/loop -run TestAgentLoop -httprecord .agent_loop."
				20	// as necessary.
				21	func TestAgentLoop(t *testing.T) {
				22	ctx := context.Background()
				23
				24	// Setup httprr recorder
				25	rrPath := "testdata/agent_loop.httprr"
				26	rr, err := httprr.Open(rrPath, http.DefaultTransport)
				27	if err != nil && !os.IsNotExist(err) {
				28	t.Fatal(err)
				29	}
				30
				31	if rr.Recording() {
				32	// Skip the test if API key is not available
				33	if os.Getenv("ANTHROPIC_API_KEY") == "" {
				34	t.Fatal("ANTHROPIC_API_KEY not set, required for HTTP recording")
				35	}
				36	}
				37
				38	// Create HTTP client
				39	var client *http.Client
				40	if rr != nil {
				41	// Scrub API keys from requests for security
				42	rr.ScrubReq(func(req *http.Request) error {
				43	req.Header.Del("x-api-key")
				44	req.Header.Del("anthropic-api-key")
				45	return nil
				46	})
				47	client = rr.Client()
				48	} else {
				49	client = &http.Client{Transport: http.DefaultTransport}
				50	}
				51
				52	// Create a new agent with the httprr client
				53	origWD, err := os.Getwd()
				54	if err != nil {
				55	t.Fatal(err)
				56	}
				57	if err := os.Chdir("/"); err != nil {
				58	t.Fatal(err)
				59	}
				60	budget := ant.Budget{MaxResponses: 100}
				61	wd, err := os.Getwd()
				62	if err != nil {
				63	t.Fatal(err)
				64	}
				65
				66	cfg := AgentConfig{
				67	Context: ctx,
				68	APIKey: os.Getenv("ANTHROPIC_API_KEY"),
				69	HTTPC: client,
				70	Budget: budget,
				71	GitUsername: "Test Agent",
				72	GitEmail: "totallyhuman@sketch.dev",
				73	SessionID: "test-session-id",
				74	ClientGOOS: "linux",
				75	ClientGOARCH: "amd64",
				76	}
				77	agent := NewAgent(cfg)
				78	if err := os.Chdir(origWD); err != nil {
				79	t.Fatal(err)
				80	}
				81	err = agent.Init(AgentInit{WorkingDir: wd, NoGit: true})
				82	if err != nil {
				83	t.Fatal(err)
				84	}
				85
				86	// Setup a test message that will trigger a simple, predictable response
				87	userMessage := "What tools are available to you? Please just list them briefly."
				88
				89	// Send the message to the agent
				90	agent.UserMessage(ctx, userMessage)
				91
				92	// Process a single loop iteration to avoid long-running tests
Sean McCullough	885a16a	2025-04-30 02:49:25 +0000	[diff] [blame]	93	agent.processTurn(ctx)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	94
				95	// Collect responses with a timeout
				96	var responses []AgentMessage
				97	timeout := time.After(10 * time.Second)
				98	done := false
				99
				100	for !done {
				101	select {
				102	case <-timeout:
				103	t.Log("Timeout reached while waiting for agent responses")
				104	done = true
				105	default:
				106	select {
				107	case msg := <-agent.outbox:
				108	t.Logf("Received message: Type=%s, EndOfTurn=%v, Content=%q", msg.Type, msg.EndOfTurn, msg.Content)
				109	responses = append(responses, msg)
				110	if msg.EndOfTurn {
				111	done = true
				112	}
				113	default:
				114	// No more messages available right now
				115	time.Sleep(100 * time.Millisecond)
				116	}
				117	}
				118	}
				119
				120	// Verify we got at least one response
				121	if len(responses) == 0 {
				122	t.Fatal("No responses received from agent")
				123	}
				124
				125	// Log the received responses for debugging
				126	t.Logf("Received %d responses", len(responses))
				127
				128	// Find the final agent response (with EndOfTurn=true)
				129	var finalResponse *AgentMessage
				130	for i := range responses {
				131	if responses[i].Type == AgentMessageType && responses[i].EndOfTurn {
				132	finalResponse = &responses[i]
				133	break
				134	}
				135	}
				136
				137	// Verify we got a final agent response
				138	if finalResponse == nil {
				139	t.Fatal("No final agent response received")
				140	}
				141
				142	// Check that the response contains tools information
				143	if !strings.Contains(strings.ToLower(finalResponse.Content), "tool") {
				144	t.Error("Expected response to mention tools")
				145	}
				146
				147	// Count how many tool use messages we received
				148	toolUseCount := 0
				149	for _, msg := range responses {
				150	if msg.Type == ToolUseMessageType {
				151	toolUseCount++
				152	}
				153	}
				154
				155	t.Logf("Agent used %d tools in its response", toolUseCount)
				156	}
Philip Zeyliger	99a9a02	2025-04-27 15:15:25 +0000	[diff] [blame]	157
				158	func TestAgentTracksOutstandingCalls(t *testing.T) {
				159	agent := &Agent{
				160	outstandingLLMCalls: make(map[string]struct{}),
				161	outstandingToolCalls: make(map[string]string),
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame^]	162	stateMachine: NewStateMachine(),
Philip Zeyliger	99a9a02	2025-04-27 15:15:25 +0000	[diff] [blame]	163	}
				164
				165	// Check initial state
				166	if count := agent.OutstandingLLMCallCount(); count != 0 {
				167	t.Errorf("Expected 0 outstanding LLM calls, got %d", count)
				168	}
				169
				170	if tools := agent.OutstandingToolCalls(); len(tools) != 0 {
				171	t.Errorf("Expected 0 outstanding tool calls, got %d", len(tools))
				172	}
				173
				174	// Add some calls
				175	agent.mu.Lock()
				176	agent.outstandingLLMCalls["llm1"] = struct{}{}
				177	agent.outstandingToolCalls["tool1"] = "bash"
				178	agent.outstandingToolCalls["tool2"] = "think"
				179	agent.mu.Unlock()
				180
				181	// Check tracking works
				182	if count := agent.OutstandingLLMCallCount(); count != 1 {
				183	t.Errorf("Expected 1 outstanding LLM call, got %d", count)
				184	}
				185
				186	tools := agent.OutstandingToolCalls()
				187	if len(tools) != 2 {
				188	t.Errorf("Expected 2 outstanding tool calls, got %d", len(tools))
				189	}
				190
				191	// Check removal
				192	agent.mu.Lock()
				193	delete(agent.outstandingLLMCalls, "llm1")
				194	delete(agent.outstandingToolCalls, "tool1")
				195	agent.mu.Unlock()
				196
				197	if count := agent.OutstandingLLMCallCount(); count != 0 {
				198	t.Errorf("Expected 0 outstanding LLM calls after removal, got %d", count)
				199	}
				200
				201	tools = agent.OutstandingToolCalls()
				202	if len(tools) != 1 {
				203	t.Errorf("Expected 1 outstanding tool call after removal, got %d", len(tools))
				204	}
				205
				206	if tools[0] != "think" {
				207	t.Errorf("Expected 'think' tool remaining, got %s", tools[0])
				208	}
				209	}
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	210
				211	// TestAgentProcessTurnWithNilResponse tests the scenario where Agent.processTurn receives
				212	// a nil value for initialResp from processUserMessage.
				213	func TestAgentProcessTurnWithNilResponse(t *testing.T) {
				214	// Create a mock conversation that will return nil and error
				215	mockConvo := &MockConvoInterface{
				216	sendMessageFunc: func(message ant.Message) (*ant.MessageResponse, error) {
				217	return nil, fmt.Errorf("test error: simulating nil response")
				218	},
				219	}
				220
				221	// Create a minimal Agent instance for testing
				222	agent := &Agent{
				223	convo: mockConvo,
				224	inbox: make(chan string, 10),
				225	outbox: make(chan AgentMessage, 10),
				226	outstandingLLMCalls: make(map[string]struct{}),
				227	outstandingToolCalls: make(map[string]string),
				228	}
				229
				230	// Create a test context
				231	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
				232	defer cancel()
				233
				234	// Push a test message to the inbox so that processUserMessage will try to process it
				235	agent.inbox <- "Test message"
				236
				237	// Call processTurn - it should exit early without panic when initialResp is nil
				238	agent.processTurn(ctx)
				239
				240	// Verify the error message was added to outbox
				241	select {
				242	case msg := <-agent.outbox:
				243	if msg.Type != ErrorMessageType {
				244	t.Errorf("Expected error message, got message type: %s", msg.Type)
				245	}
				246	if !strings.Contains(msg.Content, "simulating nil response") {
				247	t.Errorf("Expected error message to contain 'simulating nil response', got: %s", msg.Content)
				248	}
				249	case <-time.After(time.Second):
				250	t.Error("Timed out waiting for error message in outbox")
				251	}
				252
				253	// No more messages should be in the outbox since processTurn should exit early
				254	select {
				255	case msg := <-agent.outbox:
				256	t.Errorf("Expected no more messages in outbox, but got: %+v", msg)
				257	case <-time.After(100 * time.Millisecond):
				258	// This is the expected outcome - no more messages
				259	}
				260	}
				261
				262	// MockConvoInterface implements the ConvoInterface for testing
				263	type MockConvoInterface struct {
				264	sendMessageFunc func(message ant.Message) (*ant.MessageResponse, error)
				265	sendUserTextMessageFunc func(s string, otherContents ...ant.Content) (*ant.MessageResponse, error)
				266	toolResultContentsFunc func(ctx context.Context, resp *ant.MessageResponse) ([]ant.Content, error)
				267	toolResultCancelContentsFunc func(resp *ant.MessageResponse) ([]ant.Content, error)
				268	cancelToolUseFunc func(toolUseID string, cause error) error
				269	cumulativeUsageFunc func() ant.CumulativeUsage
				270	resetBudgetFunc func(ant.Budget)
				271	overBudgetFunc func() error
Philip Zeyliger	2c4db09	2025-04-28 16:57:50 -0700	[diff] [blame]	272	getIDFunc func() string
				273	subConvoWithHistoryFunc func() *ant.Convo
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	274	}
				275
				276	func (m MockConvoInterface) SendMessage(message ant.Message) (ant.MessageResponse, error) {
				277	if m.sendMessageFunc != nil {
				278	return m.sendMessageFunc(message)
				279	}
				280	return nil, nil
				281	}
				282
				283	func (m MockConvoInterface) SendUserTextMessage(s string, otherContents ...ant.Content) (ant.MessageResponse, error) {
				284	if m.sendUserTextMessageFunc != nil {
				285	return m.sendUserTextMessageFunc(s, otherContents...)
				286	}
				287	return nil, nil
				288	}
				289
				290	func (m MockConvoInterface) ToolResultContents(ctx context.Context, resp ant.MessageResponse) ([]ant.Content, error) {
				291	if m.toolResultContentsFunc != nil {
				292	return m.toolResultContentsFunc(ctx, resp)
				293	}
				294	return nil, nil
				295	}
				296
				297	func (m MockConvoInterface) ToolResultCancelContents(resp ant.MessageResponse) ([]ant.Content, error) {
				298	if m.toolResultCancelContentsFunc != nil {
				299	return m.toolResultCancelContentsFunc(resp)
				300	}
				301	return nil, nil
				302	}
				303
				304	func (m *MockConvoInterface) CancelToolUse(toolUseID string, cause error) error {
				305	if m.cancelToolUseFunc != nil {
				306	return m.cancelToolUseFunc(toolUseID, cause)
				307	}
				308	return nil
				309	}
				310
				311	func (m *MockConvoInterface) CumulativeUsage() ant.CumulativeUsage {
				312	if m.cumulativeUsageFunc != nil {
				313	return m.cumulativeUsageFunc()
				314	}
				315	return ant.CumulativeUsage{}
				316	}
				317
				318	func (m *MockConvoInterface) ResetBudget(budget ant.Budget) {
				319	if m.resetBudgetFunc != nil {
				320	m.resetBudgetFunc(budget)
				321	}
				322	}
				323
				324	func (m *MockConvoInterface) OverBudget() error {
				325	if m.overBudgetFunc != nil {
				326	return m.overBudgetFunc()
				327	}
				328	return nil
				329	}
				330
Philip Zeyliger	2c4db09	2025-04-28 16:57:50 -0700	[diff] [blame]	331	func (m *MockConvoInterface) GetID() string {
				332	if m.getIDFunc != nil {
				333	return m.getIDFunc()
				334	}
				335	return "mock-convo-id"
				336	}
				337
				338	func (m MockConvoInterface) SubConvoWithHistory() ant.Convo {
				339	if m.subConvoWithHistoryFunc != nil {
				340	return m.subConvoWithHistoryFunc()
				341	}
				342	return nil
				343	}
				344
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	345	// TestAgentProcessTurnWithNilResponseNilError tests the scenario where Agent.processTurn receives
				346	// a nil value for initialResp and nil error from processUserMessage.
				347	// This test verifies that the implementation properly handles this edge case.
				348	func TestAgentProcessTurnWithNilResponseNilError(t *testing.T) {
				349	// Create a mock conversation that will return nil response and nil error
				350	mockConvo := &MockConvoInterface{
				351	sendMessageFunc: func(message ant.Message) (*ant.MessageResponse, error) {
				352	return nil, nil // This is unusual but now handled gracefully
				353	},
				354	}
				355
				356	// Create a minimal Agent instance for testing
				357	agent := &Agent{
				358	convo: mockConvo,
				359	inbox: make(chan string, 10),
				360	outbox: make(chan AgentMessage, 10),
				361	outstandingLLMCalls: make(map[string]struct{}),
				362	outstandingToolCalls: make(map[string]string),
				363	}
				364
				365	// Create a test context
				366	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
				367	defer cancel()
				368
				369	// Push a test message to the inbox so that processUserMessage will try to process it
				370	agent.inbox <- "Test message"
				371
				372	// Call processTurn - it should handle nil initialResp with a descriptive error
				373	err := agent.processTurn(ctx)
				374
				375	// Verify we get the expected error
				376	if err == nil {
				377	t.Error("Expected processTurn to return an error for nil initialResp, but got nil")
				378	} else if !strings.Contains(err.Error(), "unexpected nil response") {
				379	t.Errorf("Expected error about nil response, got: %v", err)
				380	} else {
				381	t.Logf("As expected, processTurn returned error: %v", err)
				382	}
				383
				384	// Verify an error message was sent to the outbox
				385	select {
				386	case msg := <-agent.outbox:
				387	if msg.Type != ErrorMessageType {
				388	t.Errorf("Expected error message type, got: %s", msg.Type)
				389	}
				390	if !strings.Contains(msg.Content, "unexpected nil response") {
				391	t.Errorf("Expected error about nil response, got: %s", msg.Content)
				392	}
				393	case <-time.After(time.Second):
				394	t.Error("Timed out waiting for error message in outbox")
				395	}
				396	}
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame^]	397
				398	func TestAgentStateMachine(t *testing.T) {
				399	// Create a simplified test for the state machine functionality
				400	agent := &Agent{
				401	stateMachine: NewStateMachine(),
				402	}
				403
				404	// Initially the state should be Ready
				405	if state := agent.CurrentState(); state != StateReady {
				406	t.Errorf("Expected initial state to be StateReady, got %s", state)
				407	}
				408
				409	// Test manual transitions to verify state tracking
				410	ctx := context.Background()
				411
				412	// Track transitions
				413	var transitions []State
				414	agent.stateMachine.SetTransitionCallback(func(ctx context.Context, from, to State, event TransitionEvent) {
				415	transitions = append(transitions, to)
				416	t.Logf("State transition: %s -> %s (%s)", from, to, event.Description)
				417	})
				418
				419	// Perform a valid sequence of transitions (based on the state machine rules)
				420	expectedStates := []State{
				421	StateWaitingForUserInput,
				422	StateSendingToLLM,
				423	StateProcessingLLMResponse,
				424	StateToolUseRequested,
				425	StateCheckingForCancellation,
				426	StateRunningTool,
				427	StateCheckingGitCommits,
				428	StateRunningAutoformatters,
				429	StateCheckingBudget,
				430	StateGatheringAdditionalMessages,
				431	StateSendingToolResults,
				432	StateProcessingLLMResponse,
				433	StateEndOfTurn,
				434	}
				435
				436	// Manually perform each transition
				437	for _, state := range expectedStates {
				438	err := agent.stateMachine.Transition(ctx, state, "Test transition to "+state.String())
				439	if err != nil {
				440	t.Errorf("Failed to transition to %s: %v", state, err)
				441	}
				442	}
				443
				444	// Check if we recorded the right number of transitions
				445	if len(transitions) != len(expectedStates) {
				446	t.Errorf("Expected %d state transitions, got %d", len(expectedStates), len(transitions))
				447	}
				448
				449	// Check each transition matched what we expected
				450	for i, expected := range expectedStates {
				451	if i < len(transitions) {
				452	if transitions[i] != expected {
				453	t.Errorf("Transition %d: expected %s, got %s", i, expected, transitions[i])
				454	}
				455	}
				456	}
				457
				458	// Verify the current state is the last one we transitioned to
				459	if state := agent.CurrentState(); state != expectedStates[len(expectedStates)-1] {
				460	t.Errorf("Expected current state to be %s, got %s", expectedStates[len(expectedStates)-1], state)
				461	}
				462
				463	// Test force transition
				464	agent.stateMachine.ForceTransition(ctx, StateCancelled, "Testing force transition")
				465
				466	// Verify current state was updated
				467	if state := agent.CurrentState(); state != StateCancelled {
				468	t.Errorf("Expected forced state to be StateCancelled, got %s", state)
				469	}
				470	}
				471
				472	// mockConvoInterface is a mock implementation of ConvoInterface for testing
				473	type mockConvoInterface struct {
				474	SendMessageFunc func(message ant.Message) (*ant.MessageResponse, error)
				475	ToolResultContentsFunc func(ctx context.Context, resp *ant.MessageResponse) ([]ant.Content, error)
				476	}
				477
				478	func (c *mockConvoInterface) GetID() string {
				479	return "mockConvoInterface-id"
				480	}
				481
				482	func (c mockConvoInterface) SubConvoWithHistory() ant.Convo {
				483	return nil
				484	}
				485
				486	func (m *mockConvoInterface) CumulativeUsage() ant.CumulativeUsage {
				487	return ant.CumulativeUsage{}
				488	}
				489
				490	func (m *mockConvoInterface) ResetBudget(ant.Budget) {}
				491
				492	func (m *mockConvoInterface) OverBudget() error {
				493	return nil
				494	}
				495
				496	func (m mockConvoInterface) SendMessage(message ant.Message) (ant.MessageResponse, error) {
				497	if m.SendMessageFunc != nil {
				498	return m.SendMessageFunc(message)
				499	}
				500	return &ant.MessageResponse{StopReason: ant.StopReasonEndTurn}, nil
				501	}
				502
				503	func (m mockConvoInterface) SendUserTextMessage(s string, otherContents ...ant.Content) (ant.MessageResponse, error) {
				504	return m.SendMessage(ant.Message{Role: "user", Content: []ant.Content{{Type: "text", Text: s}}})
				505	}
				506
				507	func (m mockConvoInterface) ToolResultContents(ctx context.Context, resp ant.MessageResponse) ([]ant.Content, error) {
				508	if m.ToolResultContentsFunc != nil {
				509	return m.ToolResultContentsFunc(ctx, resp)
				510	}
				511	return []ant.Content{}, nil
				512	}
				513
				514	func (m mockConvoInterface) ToolResultCancelContents(resp ant.MessageResponse) ([]ant.Content, error) {
				515	return []ant.Content{{Type: "text", Text: "Tool use cancelled"}}, nil
				516	}
				517
				518	func (m *mockConvoInterface) CancelToolUse(toolUseID string, cause error) error {
				519	return nil
				520	}
				521
				522	func TestAgentProcessTurnStateTransitions(t *testing.T) {
				523	// Create a mock ConvoInterface for testing
				524	mockConvo := &mockConvoInterface{}
				525
				526	// Use the testing context
				527	ctx := t.Context()
				528
				529	// Create an agent with the state machine
				530	agent := &Agent{
				531	convo: mockConvo,
				532	config: AgentConfig{Context: ctx},
				533	inbox: make(chan string, 10),
				534	outbox: make(chan AgentMessage, 10),
				535	ready: make(chan struct{}),
				536	seenCommits: make(map[string]bool),
				537	outstandingLLMCalls: make(map[string]struct{}),
				538	outstandingToolCalls: make(map[string]string),
				539	stateMachine: NewStateMachine(),
				540	startOfTurn: time.Now(),
				541	}
				542
				543	// Verify initial state
				544	if state := agent.CurrentState(); state != StateReady {
				545	t.Errorf("Expected initial state to be StateReady, got %s", state)
				546	}
				547
				548	// Add a message to the inbox so we don't block in GatherMessages
				549	agent.inbox <- "Test message"
				550
				551	// Setup the mock to simulate a model response with end of turn
				552	mockConvo.SendMessageFunc = func(message ant.Message) (*ant.MessageResponse, error) {
				553	return &ant.MessageResponse{
				554	StopReason: ant.StopReasonEndTurn,
				555	Content: []ant.Content{
				556	{Type: "text", Text: "This is a test response"},
				557	},
				558	}, nil
				559	}
				560
				561	// Track state transitions
				562	var transitions []State
				563	agent.stateMachine.SetTransitionCallback(func(ctx context.Context, from, to State, event TransitionEvent) {
				564	transitions = append(transitions, to)
				565	t.Logf("State transition: %s -> %s (%s)", from, to, event.Description)
				566	})
				567
				568	// Process a turn, which should trigger state transitions
				569	agent.processTurn(ctx)
				570
				571	// The minimum expected states for a simple end-of-turn response
				572	minExpectedStates := []State{
				573	StateWaitingForUserInput,
				574	StateSendingToLLM,
				575	StateProcessingLLMResponse,
				576	StateEndOfTurn,
				577	}
				578
				579	// Verify we have at least the minimum expected states
				580	if len(transitions) < len(minExpectedStates) {
				581	t.Errorf("Expected at least %d state transitions, got %d", len(minExpectedStates), len(transitions))
				582	}
				583
				584	// Check that the transitions follow the expected sequence
				585	for i, expected := range minExpectedStates {
				586	if i < len(transitions) {
				587	if transitions[i] != expected {
				588	t.Errorf("Transition %d: expected %s, got %s", i, expected, transitions[i])
				589	}
				590	}
				591	}
				592
				593	// Verify the final state is EndOfTurn
				594	if state := agent.CurrentState(); state != StateEndOfTurn {
				595	t.Errorf("Expected final state to be StateEndOfTurn, got %s", state)
				596	}
				597	}
				598
				599	func TestAgentProcessTurnWithToolUse(t *testing.T) {
				600	// Create a mock ConvoInterface for testing
				601	mockConvo := &mockConvoInterface{}
				602
				603	// Setup a test context
				604	ctx := context.Background()
				605
				606	// Create an agent with the state machine
				607	agent := &Agent{
				608	convo: mockConvo,
				609	config: AgentConfig{Context: ctx},
				610	inbox: make(chan string, 10),
				611	outbox: make(chan AgentMessage, 10),
				612	ready: make(chan struct{}),
				613	seenCommits: make(map[string]bool),
				614	outstandingLLMCalls: make(map[string]struct{}),
				615	outstandingToolCalls: make(map[string]string),
				616	stateMachine: NewStateMachine(),
				617	startOfTurn: time.Now(),
				618	}
				619
				620	// Add a message to the inbox so we don't block in GatherMessages
				621	agent.inbox <- "Test message"
				622
				623	// First response requests a tool
				624	firstResponseDone := false
				625	mockConvo.SendMessageFunc = func(message ant.Message) (*ant.MessageResponse, error) {
				626	if !firstResponseDone {
				627	firstResponseDone = true
				628	return &ant.MessageResponse{
				629	StopReason: ant.StopReasonToolUse,
				630	Content: []ant.Content{
				631	{Type: "text", Text: "I'll use a tool"},
				632	{Type: "tool_use", ToolName: "test_tool", ToolInput: []byte("{}"), ID: "test_id"},
				633	},
				634	}, nil
				635	}
				636	// Second response ends the turn
				637	return &ant.MessageResponse{
				638	StopReason: ant.StopReasonEndTurn,
				639	Content: []ant.Content{
				640	{Type: "text", Text: "Finished using the tool"},
				641	},
				642	}, nil
				643	}
				644
				645	// Tool result content handler
				646	mockConvo.ToolResultContentsFunc = func(ctx context.Context, resp *ant.MessageResponse) ([]ant.Content, error) {
				647	return []ant.Content{{Type: "text", Text: "Tool executed successfully"}}, nil
				648	}
				649
				650	// Track state transitions
				651	var transitions []State
				652	agent.stateMachine.SetTransitionCallback(func(ctx context.Context, from, to State, event TransitionEvent) {
				653	transitions = append(transitions, to)
				654	t.Logf("State transition: %s -> %s (%s)", from, to, event.Description)
				655	})
				656
				657	// Process a turn with tool use
				658	agent.processTurn(ctx)
				659
				660	// Define expected states for a tool use flow
				661	expectedToolStates := []State{
				662	StateWaitingForUserInput,
				663	StateSendingToLLM,
				664	StateProcessingLLMResponse,
				665	StateToolUseRequested,
				666	StateCheckingForCancellation,
				667	StateRunningTool,
				668	}
				669
				670	// Verify that these states are present in order
				671	for i, expectedState := range expectedToolStates {
				672	if i >= len(transitions) {
				673	t.Errorf("Missing expected transition to %s; only got %d transitions", expectedState, len(transitions))
				674	continue
				675	}
				676	if transitions[i] != expectedState {
				677	t.Errorf("Expected transition %d to be %s, got %s", i, expectedState, transitions[i])
				678	}
				679	}
				680
				681	// Also verify we eventually reached EndOfTurn
				682	if !slices.Contains(transitions, StateEndOfTurn) {
				683	t.Errorf("Expected to eventually reach StateEndOfTurn, but never did")
				684	}
				685	}