Blame - loop/agent_test.go - sketch

blob: 9663e268d95c67a18ec38ccce131ca718f4aa6a8 [file] [log] [blame]

Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	1	package loop
				2
				3	import (
				4	"context"
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	5	"fmt"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	6	"net/http"
				7	"os"
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	8	"slices"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	9	"strings"
				10	"testing"
				11	"time"
				12
				13	"sketch.dev/ant"
				14	"sketch.dev/httprr"
				15	)
				16
				17	// TestAgentLoop tests that the Agent loop functionality works correctly.
				18	// It uses the httprr package to record HTTP interactions for replay in tests.
				19	// When failing, rebuild with "go test ./sketch/loop -run TestAgentLoop -httprecord .agent_loop."
				20	// as necessary.
				21	func TestAgentLoop(t *testing.T) {
				22	ctx := context.Background()
				23
				24	// Setup httprr recorder
				25	rrPath := "testdata/agent_loop.httprr"
				26	rr, err := httprr.Open(rrPath, http.DefaultTransport)
				27	if err != nil && !os.IsNotExist(err) {
				28	t.Fatal(err)
				29	}
				30
				31	if rr.Recording() {
				32	// Skip the test if API key is not available
				33	if os.Getenv("ANTHROPIC_API_KEY") == "" {
				34	t.Fatal("ANTHROPIC_API_KEY not set, required for HTTP recording")
				35	}
				36	}
				37
				38	// Create HTTP client
				39	var client *http.Client
				40	if rr != nil {
				41	// Scrub API keys from requests for security
				42	rr.ScrubReq(func(req *http.Request) error {
				43	req.Header.Del("x-api-key")
				44	req.Header.Del("anthropic-api-key")
				45	return nil
				46	})
				47	client = rr.Client()
				48	} else {
				49	client = &http.Client{Transport: http.DefaultTransport}
				50	}
				51
				52	// Create a new agent with the httprr client
				53	origWD, err := os.Getwd()
				54	if err != nil {
				55	t.Fatal(err)
				56	}
				57	if err := os.Chdir("/"); err != nil {
				58	t.Fatal(err)
				59	}
				60	budget := ant.Budget{MaxResponses: 100}
				61	wd, err := os.Getwd()
				62	if err != nil {
				63	t.Fatal(err)
				64	}
				65
				66	cfg := AgentConfig{
				67	Context: ctx,
				68	APIKey: os.Getenv("ANTHROPIC_API_KEY"),
				69	HTTPC: client,
				70	Budget: budget,
				71	GitUsername: "Test Agent",
				72	GitEmail: "totallyhuman@sketch.dev",
				73	SessionID: "test-session-id",
				74	ClientGOOS: "linux",
				75	ClientGOARCH: "amd64",
				76	}
				77	agent := NewAgent(cfg)
				78	if err := os.Chdir(origWD); err != nil {
				79	t.Fatal(err)
				80	}
				81	err = agent.Init(AgentInit{WorkingDir: wd, NoGit: true})
				82	if err != nil {
				83	t.Fatal(err)
				84	}
				85
				86	// Setup a test message that will trigger a simple, predictable response
				87	userMessage := "What tools are available to you? Please just list them briefly."
				88
				89	// Send the message to the agent
				90	agent.UserMessage(ctx, userMessage)
				91
				92	// Process a single loop iteration to avoid long-running tests
Sean McCullough	885a16a	2025-04-30 02:49:25 +0000	[diff] [blame]	93	agent.processTurn(ctx)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	94
				95	// Collect responses with a timeout
				96	var responses []AgentMessage
Philip Zeyliger	b7c5875	2025-05-01 10:10:17 -0700	[diff] [blame^]	97	ctx2, _ := context.WithDeadline(ctx, time.Now().Add(10*time.Second))
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	98	done := false
Philip Zeyliger	b7c5875	2025-05-01 10:10:17 -0700	[diff] [blame^]	99	it := agent.NewIterator(ctx2, 0)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	100
				101	for !done {
Philip Zeyliger	b7c5875	2025-05-01 10:10:17 -0700	[diff] [blame^]	102	msg := it.Next()
				103	t.Logf("Received message: Type=%s, EndOfTurn=%v, Content=%q", msg.Type, msg.EndOfTurn, msg.Content)
				104	responses = append(responses, *msg)
				105	if msg.EndOfTurn {
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	106	done = true
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	107	}
				108	}
				109
				110	// Verify we got at least one response
				111	if len(responses) == 0 {
				112	t.Fatal("No responses received from agent")
				113	}
				114
				115	// Log the received responses for debugging
				116	t.Logf("Received %d responses", len(responses))
				117
				118	// Find the final agent response (with EndOfTurn=true)
				119	var finalResponse *AgentMessage
				120	for i := range responses {
				121	if responses[i].Type == AgentMessageType && responses[i].EndOfTurn {
				122	finalResponse = &responses[i]
				123	break
				124	}
				125	}
				126
				127	// Verify we got a final agent response
				128	if finalResponse == nil {
				129	t.Fatal("No final agent response received")
				130	}
				131
				132	// Check that the response contains tools information
				133	if !strings.Contains(strings.ToLower(finalResponse.Content), "tool") {
				134	t.Error("Expected response to mention tools")
				135	}
				136
				137	// Count how many tool use messages we received
				138	toolUseCount := 0
				139	for _, msg := range responses {
				140	if msg.Type == ToolUseMessageType {
				141	toolUseCount++
				142	}
				143	}
				144
				145	t.Logf("Agent used %d tools in its response", toolUseCount)
				146	}
Philip Zeyliger	99a9a02	2025-04-27 15:15:25 +0000	[diff] [blame]	147
				148	func TestAgentTracksOutstandingCalls(t *testing.T) {
				149	agent := &Agent{
				150	outstandingLLMCalls: make(map[string]struct{}),
				151	outstandingToolCalls: make(map[string]string),
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	152	stateMachine: NewStateMachine(),
Philip Zeyliger	99a9a02	2025-04-27 15:15:25 +0000	[diff] [blame]	153	}
				154
				155	// Check initial state
				156	if count := agent.OutstandingLLMCallCount(); count != 0 {
				157	t.Errorf("Expected 0 outstanding LLM calls, got %d", count)
				158	}
				159
				160	if tools := agent.OutstandingToolCalls(); len(tools) != 0 {
				161	t.Errorf("Expected 0 outstanding tool calls, got %d", len(tools))
				162	}
				163
				164	// Add some calls
				165	agent.mu.Lock()
				166	agent.outstandingLLMCalls["llm1"] = struct{}{}
				167	agent.outstandingToolCalls["tool1"] = "bash"
				168	agent.outstandingToolCalls["tool2"] = "think"
				169	agent.mu.Unlock()
				170
				171	// Check tracking works
				172	if count := agent.OutstandingLLMCallCount(); count != 1 {
				173	t.Errorf("Expected 1 outstanding LLM call, got %d", count)
				174	}
				175
				176	tools := agent.OutstandingToolCalls()
				177	if len(tools) != 2 {
				178	t.Errorf("Expected 2 outstanding tool calls, got %d", len(tools))
				179	}
				180
				181	// Check removal
				182	agent.mu.Lock()
				183	delete(agent.outstandingLLMCalls, "llm1")
				184	delete(agent.outstandingToolCalls, "tool1")
				185	agent.mu.Unlock()
				186
				187	if count := agent.OutstandingLLMCallCount(); count != 0 {
				188	t.Errorf("Expected 0 outstanding LLM calls after removal, got %d", count)
				189	}
				190
				191	tools = agent.OutstandingToolCalls()
				192	if len(tools) != 1 {
				193	t.Errorf("Expected 1 outstanding tool call after removal, got %d", len(tools))
				194	}
				195
				196	if tools[0] != "think" {
				197	t.Errorf("Expected 'think' tool remaining, got %s", tools[0])
				198	}
				199	}
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	200
				201	// TestAgentProcessTurnWithNilResponse tests the scenario where Agent.processTurn receives
				202	// a nil value for initialResp from processUserMessage.
				203	func TestAgentProcessTurnWithNilResponse(t *testing.T) {
				204	// Create a mock conversation that will return nil and error
				205	mockConvo := &MockConvoInterface{
				206	sendMessageFunc: func(message ant.Message) (*ant.MessageResponse, error) {
				207	return nil, fmt.Errorf("test error: simulating nil response")
				208	},
				209	}
				210
				211	// Create a minimal Agent instance for testing
				212	agent := &Agent{
				213	convo: mockConvo,
				214	inbox: make(chan string, 10),
				215	outbox: make(chan AgentMessage, 10),
				216	outstandingLLMCalls: make(map[string]struct{}),
				217	outstandingToolCalls: make(map[string]string),
				218	}
				219
				220	// Create a test context
				221	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
				222	defer cancel()
				223
				224	// Push a test message to the inbox so that processUserMessage will try to process it
				225	agent.inbox <- "Test message"
				226
				227	// Call processTurn - it should exit early without panic when initialResp is nil
				228	agent.processTurn(ctx)
				229
				230	// Verify the error message was added to outbox
				231	select {
				232	case msg := <-agent.outbox:
				233	if msg.Type != ErrorMessageType {
				234	t.Errorf("Expected error message, got message type: %s", msg.Type)
				235	}
				236	if !strings.Contains(msg.Content, "simulating nil response") {
				237	t.Errorf("Expected error message to contain 'simulating nil response', got: %s", msg.Content)
				238	}
				239	case <-time.After(time.Second):
				240	t.Error("Timed out waiting for error message in outbox")
				241	}
				242
				243	// No more messages should be in the outbox since processTurn should exit early
				244	select {
				245	case msg := <-agent.outbox:
				246	t.Errorf("Expected no more messages in outbox, but got: %+v", msg)
				247	case <-time.After(100 * time.Millisecond):
				248	// This is the expected outcome - no more messages
				249	}
				250	}
				251
				252	// MockConvoInterface implements the ConvoInterface for testing
				253	type MockConvoInterface struct {
				254	sendMessageFunc func(message ant.Message) (*ant.MessageResponse, error)
				255	sendUserTextMessageFunc func(s string, otherContents ...ant.Content) (*ant.MessageResponse, error)
				256	toolResultContentsFunc func(ctx context.Context, resp *ant.MessageResponse) ([]ant.Content, error)
				257	toolResultCancelContentsFunc func(resp *ant.MessageResponse) ([]ant.Content, error)
				258	cancelToolUseFunc func(toolUseID string, cause error) error
				259	cumulativeUsageFunc func() ant.CumulativeUsage
				260	resetBudgetFunc func(ant.Budget)
				261	overBudgetFunc func() error
Philip Zeyliger	2c4db09	2025-04-28 16:57:50 -0700	[diff] [blame]	262	getIDFunc func() string
				263	subConvoWithHistoryFunc func() *ant.Convo
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	264	}
				265
				266	func (m MockConvoInterface) SendMessage(message ant.Message) (ant.MessageResponse, error) {
				267	if m.sendMessageFunc != nil {
				268	return m.sendMessageFunc(message)
				269	}
				270	return nil, nil
				271	}
				272
				273	func (m MockConvoInterface) SendUserTextMessage(s string, otherContents ...ant.Content) (ant.MessageResponse, error) {
				274	if m.sendUserTextMessageFunc != nil {
				275	return m.sendUserTextMessageFunc(s, otherContents...)
				276	}
				277	return nil, nil
				278	}
				279
				280	func (m MockConvoInterface) ToolResultContents(ctx context.Context, resp ant.MessageResponse) ([]ant.Content, error) {
				281	if m.toolResultContentsFunc != nil {
				282	return m.toolResultContentsFunc(ctx, resp)
				283	}
				284	return nil, nil
				285	}
				286
				287	func (m MockConvoInterface) ToolResultCancelContents(resp ant.MessageResponse) ([]ant.Content, error) {
				288	if m.toolResultCancelContentsFunc != nil {
				289	return m.toolResultCancelContentsFunc(resp)
				290	}
				291	return nil, nil
				292	}
				293
				294	func (m *MockConvoInterface) CancelToolUse(toolUseID string, cause error) error {
				295	if m.cancelToolUseFunc != nil {
				296	return m.cancelToolUseFunc(toolUseID, cause)
				297	}
				298	return nil
				299	}
				300
				301	func (m *MockConvoInterface) CumulativeUsage() ant.CumulativeUsage {
				302	if m.cumulativeUsageFunc != nil {
				303	return m.cumulativeUsageFunc()
				304	}
				305	return ant.CumulativeUsage{}
				306	}
				307
				308	func (m *MockConvoInterface) ResetBudget(budget ant.Budget) {
				309	if m.resetBudgetFunc != nil {
				310	m.resetBudgetFunc(budget)
				311	}
				312	}
				313
				314	func (m *MockConvoInterface) OverBudget() error {
				315	if m.overBudgetFunc != nil {
				316	return m.overBudgetFunc()
				317	}
				318	return nil
				319	}
				320
Philip Zeyliger	2c4db09	2025-04-28 16:57:50 -0700	[diff] [blame]	321	func (m *MockConvoInterface) GetID() string {
				322	if m.getIDFunc != nil {
				323	return m.getIDFunc()
				324	}
				325	return "mock-convo-id"
				326	}
				327
				328	func (m MockConvoInterface) SubConvoWithHistory() ant.Convo {
				329	if m.subConvoWithHistoryFunc != nil {
				330	return m.subConvoWithHistoryFunc()
				331	}
				332	return nil
				333	}
				334
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	335	// TestAgentProcessTurnWithNilResponseNilError tests the scenario where Agent.processTurn receives
				336	// a nil value for initialResp and nil error from processUserMessage.
				337	// This test verifies that the implementation properly handles this edge case.
				338	func TestAgentProcessTurnWithNilResponseNilError(t *testing.T) {
				339	// Create a mock conversation that will return nil response and nil error
				340	mockConvo := &MockConvoInterface{
				341	sendMessageFunc: func(message ant.Message) (*ant.MessageResponse, error) {
				342	return nil, nil // This is unusual but now handled gracefully
				343	},
				344	}
				345
				346	// Create a minimal Agent instance for testing
				347	agent := &Agent{
				348	convo: mockConvo,
				349	inbox: make(chan string, 10),
				350	outbox: make(chan AgentMessage, 10),
				351	outstandingLLMCalls: make(map[string]struct{}),
				352	outstandingToolCalls: make(map[string]string),
				353	}
				354
				355	// Create a test context
				356	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
				357	defer cancel()
				358
				359	// Push a test message to the inbox so that processUserMessage will try to process it
				360	agent.inbox <- "Test message"
				361
				362	// Call processTurn - it should handle nil initialResp with a descriptive error
				363	err := agent.processTurn(ctx)
				364
				365	// Verify we get the expected error
				366	if err == nil {
				367	t.Error("Expected processTurn to return an error for nil initialResp, but got nil")
				368	} else if !strings.Contains(err.Error(), "unexpected nil response") {
				369	t.Errorf("Expected error about nil response, got: %v", err)
				370	} else {
				371	t.Logf("As expected, processTurn returned error: %v", err)
				372	}
				373
				374	// Verify an error message was sent to the outbox
				375	select {
				376	case msg := <-agent.outbox:
				377	if msg.Type != ErrorMessageType {
				378	t.Errorf("Expected error message type, got: %s", msg.Type)
				379	}
				380	if !strings.Contains(msg.Content, "unexpected nil response") {
				381	t.Errorf("Expected error about nil response, got: %s", msg.Content)
				382	}
				383	case <-time.After(time.Second):
				384	t.Error("Timed out waiting for error message in outbox")
				385	}
				386	}
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	387
				388	func TestAgentStateMachine(t *testing.T) {
				389	// Create a simplified test for the state machine functionality
				390	agent := &Agent{
				391	stateMachine: NewStateMachine(),
				392	}
				393
				394	// Initially the state should be Ready
				395	if state := agent.CurrentState(); state != StateReady {
				396	t.Errorf("Expected initial state to be StateReady, got %s", state)
				397	}
				398
				399	// Test manual transitions to verify state tracking
				400	ctx := context.Background()
				401
				402	// Track transitions
				403	var transitions []State
				404	agent.stateMachine.SetTransitionCallback(func(ctx context.Context, from, to State, event TransitionEvent) {
				405	transitions = append(transitions, to)
				406	t.Logf("State transition: %s -> %s (%s)", from, to, event.Description)
				407	})
				408
				409	// Perform a valid sequence of transitions (based on the state machine rules)
				410	expectedStates := []State{
				411	StateWaitingForUserInput,
				412	StateSendingToLLM,
				413	StateProcessingLLMResponse,
				414	StateToolUseRequested,
				415	StateCheckingForCancellation,
				416	StateRunningTool,
				417	StateCheckingGitCommits,
				418	StateRunningAutoformatters,
				419	StateCheckingBudget,
				420	StateGatheringAdditionalMessages,
				421	StateSendingToolResults,
				422	StateProcessingLLMResponse,
				423	StateEndOfTurn,
				424	}
				425
				426	// Manually perform each transition
				427	for _, state := range expectedStates {
				428	err := agent.stateMachine.Transition(ctx, state, "Test transition to "+state.String())
				429	if err != nil {
				430	t.Errorf("Failed to transition to %s: %v", state, err)
				431	}
				432	}
				433
				434	// Check if we recorded the right number of transitions
				435	if len(transitions) != len(expectedStates) {
				436	t.Errorf("Expected %d state transitions, got %d", len(expectedStates), len(transitions))
				437	}
				438
				439	// Check each transition matched what we expected
				440	for i, expected := range expectedStates {
				441	if i < len(transitions) {
				442	if transitions[i] != expected {
				443	t.Errorf("Transition %d: expected %s, got %s", i, expected, transitions[i])
				444	}
				445	}
				446	}
				447
				448	// Verify the current state is the last one we transitioned to
				449	if state := agent.CurrentState(); state != expectedStates[len(expectedStates)-1] {
				450	t.Errorf("Expected current state to be %s, got %s", expectedStates[len(expectedStates)-1], state)
				451	}
				452
				453	// Test force transition
				454	agent.stateMachine.ForceTransition(ctx, StateCancelled, "Testing force transition")
				455
				456	// Verify current state was updated
				457	if state := agent.CurrentState(); state != StateCancelled {
				458	t.Errorf("Expected forced state to be StateCancelled, got %s", state)
				459	}
				460	}
				461
				462	// mockConvoInterface is a mock implementation of ConvoInterface for testing
				463	type mockConvoInterface struct {
				464	SendMessageFunc func(message ant.Message) (*ant.MessageResponse, error)
				465	ToolResultContentsFunc func(ctx context.Context, resp *ant.MessageResponse) ([]ant.Content, error)
				466	}
				467
				468	func (c *mockConvoInterface) GetID() string {
				469	return "mockConvoInterface-id"
				470	}
				471
				472	func (c mockConvoInterface) SubConvoWithHistory() ant.Convo {
				473	return nil
				474	}
				475
				476	func (m *mockConvoInterface) CumulativeUsage() ant.CumulativeUsage {
				477	return ant.CumulativeUsage{}
				478	}
				479
				480	func (m *mockConvoInterface) ResetBudget(ant.Budget) {}
				481
				482	func (m *mockConvoInterface) OverBudget() error {
				483	return nil
				484	}
				485
				486	func (m mockConvoInterface) SendMessage(message ant.Message) (ant.MessageResponse, error) {
				487	if m.SendMessageFunc != nil {
				488	return m.SendMessageFunc(message)
				489	}
				490	return &ant.MessageResponse{StopReason: ant.StopReasonEndTurn}, nil
				491	}
				492
				493	func (m mockConvoInterface) SendUserTextMessage(s string, otherContents ...ant.Content) (ant.MessageResponse, error) {
				494	return m.SendMessage(ant.Message{Role: "user", Content: []ant.Content{{Type: "text", Text: s}}})
				495	}
				496
				497	func (m mockConvoInterface) ToolResultContents(ctx context.Context, resp ant.MessageResponse) ([]ant.Content, error) {
				498	if m.ToolResultContentsFunc != nil {
				499	return m.ToolResultContentsFunc(ctx, resp)
				500	}
				501	return []ant.Content{}, nil
				502	}
				503
				504	func (m mockConvoInterface) ToolResultCancelContents(resp ant.MessageResponse) ([]ant.Content, error) {
				505	return []ant.Content{{Type: "text", Text: "Tool use cancelled"}}, nil
				506	}
				507
				508	func (m *mockConvoInterface) CancelToolUse(toolUseID string, cause error) error {
				509	return nil
				510	}
				511
				512	func TestAgentProcessTurnStateTransitions(t *testing.T) {
				513	// Create a mock ConvoInterface for testing
				514	mockConvo := &mockConvoInterface{}
				515
				516	// Use the testing context
				517	ctx := t.Context()
				518
				519	// Create an agent with the state machine
				520	agent := &Agent{
				521	convo: mockConvo,
				522	config: AgentConfig{Context: ctx},
				523	inbox: make(chan string, 10),
				524	outbox: make(chan AgentMessage, 10),
				525	ready: make(chan struct{}),
				526	seenCommits: make(map[string]bool),
				527	outstandingLLMCalls: make(map[string]struct{}),
				528	outstandingToolCalls: make(map[string]string),
				529	stateMachine: NewStateMachine(),
				530	startOfTurn: time.Now(),
				531	}
				532
				533	// Verify initial state
				534	if state := agent.CurrentState(); state != StateReady {
				535	t.Errorf("Expected initial state to be StateReady, got %s", state)
				536	}
				537
				538	// Add a message to the inbox so we don't block in GatherMessages
				539	agent.inbox <- "Test message"
				540
				541	// Setup the mock to simulate a model response with end of turn
				542	mockConvo.SendMessageFunc = func(message ant.Message) (*ant.MessageResponse, error) {
				543	return &ant.MessageResponse{
				544	StopReason: ant.StopReasonEndTurn,
				545	Content: []ant.Content{
				546	{Type: "text", Text: "This is a test response"},
				547	},
				548	}, nil
				549	}
				550
				551	// Track state transitions
				552	var transitions []State
				553	agent.stateMachine.SetTransitionCallback(func(ctx context.Context, from, to State, event TransitionEvent) {
				554	transitions = append(transitions, to)
				555	t.Logf("State transition: %s -> %s (%s)", from, to, event.Description)
				556	})
				557
				558	// Process a turn, which should trigger state transitions
				559	agent.processTurn(ctx)
				560
				561	// The minimum expected states for a simple end-of-turn response
				562	minExpectedStates := []State{
				563	StateWaitingForUserInput,
				564	StateSendingToLLM,
				565	StateProcessingLLMResponse,
				566	StateEndOfTurn,
				567	}
				568
				569	// Verify we have at least the minimum expected states
				570	if len(transitions) < len(minExpectedStates) {
				571	t.Errorf("Expected at least %d state transitions, got %d", len(minExpectedStates), len(transitions))
				572	}
				573
				574	// Check that the transitions follow the expected sequence
				575	for i, expected := range minExpectedStates {
				576	if i < len(transitions) {
				577	if transitions[i] != expected {
				578	t.Errorf("Transition %d: expected %s, got %s", i, expected, transitions[i])
				579	}
				580	}
				581	}
				582
				583	// Verify the final state is EndOfTurn
				584	if state := agent.CurrentState(); state != StateEndOfTurn {
				585	t.Errorf("Expected final state to be StateEndOfTurn, got %s", state)
				586	}
				587	}
				588
				589	func TestAgentProcessTurnWithToolUse(t *testing.T) {
				590	// Create a mock ConvoInterface for testing
				591	mockConvo := &mockConvoInterface{}
				592
				593	// Setup a test context
				594	ctx := context.Background()
				595
				596	// Create an agent with the state machine
				597	agent := &Agent{
				598	convo: mockConvo,
				599	config: AgentConfig{Context: ctx},
				600	inbox: make(chan string, 10),
				601	outbox: make(chan AgentMessage, 10),
				602	ready: make(chan struct{}),
				603	seenCommits: make(map[string]bool),
				604	outstandingLLMCalls: make(map[string]struct{}),
				605	outstandingToolCalls: make(map[string]string),
				606	stateMachine: NewStateMachine(),
				607	startOfTurn: time.Now(),
				608	}
				609
				610	// Add a message to the inbox so we don't block in GatherMessages
				611	agent.inbox <- "Test message"
				612
				613	// First response requests a tool
				614	firstResponseDone := false
				615	mockConvo.SendMessageFunc = func(message ant.Message) (*ant.MessageResponse, error) {
				616	if !firstResponseDone {
				617	firstResponseDone = true
				618	return &ant.MessageResponse{
				619	StopReason: ant.StopReasonToolUse,
				620	Content: []ant.Content{
				621	{Type: "text", Text: "I'll use a tool"},
				622	{Type: "tool_use", ToolName: "test_tool", ToolInput: []byte("{}"), ID: "test_id"},
				623	},
				624	}, nil
				625	}
				626	// Second response ends the turn
				627	return &ant.MessageResponse{
				628	StopReason: ant.StopReasonEndTurn,
				629	Content: []ant.Content{
				630	{Type: "text", Text: "Finished using the tool"},
				631	},
				632	}, nil
				633	}
				634
				635	// Tool result content handler
				636	mockConvo.ToolResultContentsFunc = func(ctx context.Context, resp *ant.MessageResponse) ([]ant.Content, error) {
				637	return []ant.Content{{Type: "text", Text: "Tool executed successfully"}}, nil
				638	}
				639
				640	// Track state transitions
				641	var transitions []State
				642	agent.stateMachine.SetTransitionCallback(func(ctx context.Context, from, to State, event TransitionEvent) {
				643	transitions = append(transitions, to)
				644	t.Logf("State transition: %s -> %s (%s)", from, to, event.Description)
				645	})
				646
				647	// Process a turn with tool use
				648	agent.processTurn(ctx)
				649
				650	// Define expected states for a tool use flow
				651	expectedToolStates := []State{
				652	StateWaitingForUserInput,
				653	StateSendingToLLM,
				654	StateProcessingLLMResponse,
				655	StateToolUseRequested,
				656	StateCheckingForCancellation,
				657	StateRunningTool,
				658	}
				659
				660	// Verify that these states are present in order
				661	for i, expectedState := range expectedToolStates {
				662	if i >= len(transitions) {
				663	t.Errorf("Missing expected transition to %s; only got %d transitions", expectedState, len(transitions))
				664	continue
				665	}
				666	if transitions[i] != expectedState {
				667	t.Errorf("Expected transition %d to be %s, got %s", i, expectedState, transitions[i])
				668	}
				669	}
				670
				671	// Also verify we eventually reached EndOfTurn
				672	if !slices.Contains(transitions, StateEndOfTurn) {
				673	t.Errorf("Expected to eventually reach StateEndOfTurn, but never did")
				674	}
				675	}