Blame - loop/agent_test.go - sketch

blob: f1d5b51ace66ba99ba30fc322d68388b4238d3e1 [file] [log] [blame]

Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	1	package loop
				2
				3	import (
				4	"context"
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	5	"fmt"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	6	"net/http"
				7	"os"
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	8	"slices"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	9	"strings"
				10	"testing"
				11	"time"
				12
				13	"sketch.dev/ant"
				14	"sketch.dev/httprr"
				15	)
				16
				17	// TestAgentLoop tests that the Agent loop functionality works correctly.
				18	// It uses the httprr package to record HTTP interactions for replay in tests.
				19	// When failing, rebuild with "go test ./sketch/loop -run TestAgentLoop -httprecord .agent_loop."
				20	// as necessary.
				21	func TestAgentLoop(t *testing.T) {
				22	ctx := context.Background()
				23
				24	// Setup httprr recorder
				25	rrPath := "testdata/agent_loop.httprr"
				26	rr, err := httprr.Open(rrPath, http.DefaultTransport)
				27	if err != nil && !os.IsNotExist(err) {
				28	t.Fatal(err)
				29	}
				30
				31	if rr.Recording() {
				32	// Skip the test if API key is not available
				33	if os.Getenv("ANTHROPIC_API_KEY") == "" {
				34	t.Fatal("ANTHROPIC_API_KEY not set, required for HTTP recording")
				35	}
				36	}
				37
				38	// Create HTTP client
				39	var client *http.Client
				40	if rr != nil {
				41	// Scrub API keys from requests for security
				42	rr.ScrubReq(func(req *http.Request) error {
				43	req.Header.Del("x-api-key")
				44	req.Header.Del("anthropic-api-key")
				45	return nil
				46	})
				47	client = rr.Client()
				48	} else {
				49	client = &http.Client{Transport: http.DefaultTransport}
				50	}
				51
				52	// Create a new agent with the httprr client
				53	origWD, err := os.Getwd()
				54	if err != nil {
				55	t.Fatal(err)
				56	}
				57	if err := os.Chdir("/"); err != nil {
				58	t.Fatal(err)
				59	}
				60	budget := ant.Budget{MaxResponses: 100}
				61	wd, err := os.Getwd()
				62	if err != nil {
				63	t.Fatal(err)
				64	}
				65
				66	cfg := AgentConfig{
				67	Context: ctx,
				68	APIKey: os.Getenv("ANTHROPIC_API_KEY"),
				69	HTTPC: client,
				70	Budget: budget,
				71	GitUsername: "Test Agent",
				72	GitEmail: "totallyhuman@sketch.dev",
				73	SessionID: "test-session-id",
				74	ClientGOOS: "linux",
				75	ClientGOARCH: "amd64",
				76	}
				77	agent := NewAgent(cfg)
				78	if err := os.Chdir(origWD); err != nil {
				79	t.Fatal(err)
				80	}
				81	err = agent.Init(AgentInit{WorkingDir: wd, NoGit: true})
				82	if err != nil {
				83	t.Fatal(err)
				84	}
				85
				86	// Setup a test message that will trigger a simple, predictable response
				87	userMessage := "What tools are available to you? Please just list them briefly."
				88
				89	// Send the message to the agent
				90	agent.UserMessage(ctx, userMessage)
				91
				92	// Process a single loop iteration to avoid long-running tests
Sean McCullough	885a16a	2025-04-30 02:49:25 +0000	[diff] [blame]	93	agent.processTurn(ctx)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	94
				95	// Collect responses with a timeout
				96	var responses []AgentMessage
Philip Zeyliger	9373c07	2025-05-01 10:27:01 -0700	[diff] [blame^]	97	ctx2, cancel := context.WithDeadline(ctx, time.Now().Add(10*time.Second))
				98	defer cancel()
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	99	done := false
Philip Zeyliger	b7c5875	2025-05-01 10:10:17 -0700	[diff] [blame]	100	it := agent.NewIterator(ctx2, 0)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	101
				102	for !done {
Philip Zeyliger	b7c5875	2025-05-01 10:10:17 -0700	[diff] [blame]	103	msg := it.Next()
				104	t.Logf("Received message: Type=%s, EndOfTurn=%v, Content=%q", msg.Type, msg.EndOfTurn, msg.Content)
				105	responses = append(responses, *msg)
				106	if msg.EndOfTurn {
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	107	done = true
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	108	}
				109	}
				110
				111	// Verify we got at least one response
				112	if len(responses) == 0 {
				113	t.Fatal("No responses received from agent")
				114	}
				115
				116	// Log the received responses for debugging
				117	t.Logf("Received %d responses", len(responses))
				118
				119	// Find the final agent response (with EndOfTurn=true)
				120	var finalResponse *AgentMessage
				121	for i := range responses {
				122	if responses[i].Type == AgentMessageType && responses[i].EndOfTurn {
				123	finalResponse = &responses[i]
				124	break
				125	}
				126	}
				127
				128	// Verify we got a final agent response
				129	if finalResponse == nil {
				130	t.Fatal("No final agent response received")
				131	}
				132
				133	// Check that the response contains tools information
				134	if !strings.Contains(strings.ToLower(finalResponse.Content), "tool") {
				135	t.Error("Expected response to mention tools")
				136	}
				137
				138	// Count how many tool use messages we received
				139	toolUseCount := 0
				140	for _, msg := range responses {
				141	if msg.Type == ToolUseMessageType {
				142	toolUseCount++
				143	}
				144	}
				145
				146	t.Logf("Agent used %d tools in its response", toolUseCount)
				147	}
Philip Zeyliger	99a9a02	2025-04-27 15:15:25 +0000	[diff] [blame]	148
				149	func TestAgentTracksOutstandingCalls(t *testing.T) {
				150	agent := &Agent{
				151	outstandingLLMCalls: make(map[string]struct{}),
				152	outstandingToolCalls: make(map[string]string),
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	153	stateMachine: NewStateMachine(),
Philip Zeyliger	99a9a02	2025-04-27 15:15:25 +0000	[diff] [blame]	154	}
				155
				156	// Check initial state
				157	if count := agent.OutstandingLLMCallCount(); count != 0 {
				158	t.Errorf("Expected 0 outstanding LLM calls, got %d", count)
				159	}
				160
				161	if tools := agent.OutstandingToolCalls(); len(tools) != 0 {
				162	t.Errorf("Expected 0 outstanding tool calls, got %d", len(tools))
				163	}
				164
				165	// Add some calls
				166	agent.mu.Lock()
				167	agent.outstandingLLMCalls["llm1"] = struct{}{}
				168	agent.outstandingToolCalls["tool1"] = "bash"
				169	agent.outstandingToolCalls["tool2"] = "think"
				170	agent.mu.Unlock()
				171
				172	// Check tracking works
				173	if count := agent.OutstandingLLMCallCount(); count != 1 {
				174	t.Errorf("Expected 1 outstanding LLM call, got %d", count)
				175	}
				176
				177	tools := agent.OutstandingToolCalls()
				178	if len(tools) != 2 {
				179	t.Errorf("Expected 2 outstanding tool calls, got %d", len(tools))
				180	}
				181
				182	// Check removal
				183	agent.mu.Lock()
				184	delete(agent.outstandingLLMCalls, "llm1")
				185	delete(agent.outstandingToolCalls, "tool1")
				186	agent.mu.Unlock()
				187
				188	if count := agent.OutstandingLLMCallCount(); count != 0 {
				189	t.Errorf("Expected 0 outstanding LLM calls after removal, got %d", count)
				190	}
				191
				192	tools = agent.OutstandingToolCalls()
				193	if len(tools) != 1 {
				194	t.Errorf("Expected 1 outstanding tool call after removal, got %d", len(tools))
				195	}
				196
				197	if tools[0] != "think" {
				198	t.Errorf("Expected 'think' tool remaining, got %s", tools[0])
				199	}
				200	}
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	201
				202	// TestAgentProcessTurnWithNilResponse tests the scenario where Agent.processTurn receives
				203	// a nil value for initialResp from processUserMessage.
				204	func TestAgentProcessTurnWithNilResponse(t *testing.T) {
				205	// Create a mock conversation that will return nil and error
				206	mockConvo := &MockConvoInterface{
				207	sendMessageFunc: func(message ant.Message) (*ant.MessageResponse, error) {
				208	return nil, fmt.Errorf("test error: simulating nil response")
				209	},
				210	}
				211
				212	// Create a minimal Agent instance for testing
				213	agent := &Agent{
				214	convo: mockConvo,
				215	inbox: make(chan string, 10),
Philip Zeyliger	9373c07	2025-05-01 10:27:01 -0700	[diff] [blame^]	216	subscribers: []chan *AgentMessage{},
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	217	outstandingLLMCalls: make(map[string]struct{}),
				218	outstandingToolCalls: make(map[string]string),
				219	}
				220
				221	// Create a test context
				222	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
				223	defer cancel()
				224
				225	// Push a test message to the inbox so that processUserMessage will try to process it
				226	agent.inbox <- "Test message"
				227
				228	// Call processTurn - it should exit early without panic when initialResp is nil
				229	agent.processTurn(ctx)
				230
Philip Zeyliger	9373c07	2025-05-01 10:27:01 -0700	[diff] [blame^]	231	// Verify error message was added to history
				232	agent.mu.Lock()
				233	defer agent.mu.Unlock()
				234
				235	// There should be exactly one message
				236	if len(agent.history) != 1 {
				237	t.Errorf("Expected exactly one message, got %d", len(agent.history))
				238	} else {
				239	msg := agent.history[0]
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	240	if msg.Type != ErrorMessageType {
				241	t.Errorf("Expected error message, got message type: %s", msg.Type)
				242	}
				243	if !strings.Contains(msg.Content, "simulating nil response") {
				244	t.Errorf("Expected error message to contain 'simulating nil response', got: %s", msg.Content)
				245	}
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	246	}
				247	}
				248
				249	// MockConvoInterface implements the ConvoInterface for testing
				250	type MockConvoInterface struct {
				251	sendMessageFunc func(message ant.Message) (*ant.MessageResponse, error)
				252	sendUserTextMessageFunc func(s string, otherContents ...ant.Content) (*ant.MessageResponse, error)
				253	toolResultContentsFunc func(ctx context.Context, resp *ant.MessageResponse) ([]ant.Content, error)
				254	toolResultCancelContentsFunc func(resp *ant.MessageResponse) ([]ant.Content, error)
				255	cancelToolUseFunc func(toolUseID string, cause error) error
				256	cumulativeUsageFunc func() ant.CumulativeUsage
				257	resetBudgetFunc func(ant.Budget)
				258	overBudgetFunc func() error
Philip Zeyliger	2c4db09	2025-04-28 16:57:50 -0700	[diff] [blame]	259	getIDFunc func() string
				260	subConvoWithHistoryFunc func() *ant.Convo
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	261	}
				262
				263	func (m MockConvoInterface) SendMessage(message ant.Message) (ant.MessageResponse, error) {
				264	if m.sendMessageFunc != nil {
				265	return m.sendMessageFunc(message)
				266	}
				267	return nil, nil
				268	}
				269
				270	func (m MockConvoInterface) SendUserTextMessage(s string, otherContents ...ant.Content) (ant.MessageResponse, error) {
				271	if m.sendUserTextMessageFunc != nil {
				272	return m.sendUserTextMessageFunc(s, otherContents...)
				273	}
				274	return nil, nil
				275	}
				276
				277	func (m MockConvoInterface) ToolResultContents(ctx context.Context, resp ant.MessageResponse) ([]ant.Content, error) {
				278	if m.toolResultContentsFunc != nil {
				279	return m.toolResultContentsFunc(ctx, resp)
				280	}
				281	return nil, nil
				282	}
				283
				284	func (m MockConvoInterface) ToolResultCancelContents(resp ant.MessageResponse) ([]ant.Content, error) {
				285	if m.toolResultCancelContentsFunc != nil {
				286	return m.toolResultCancelContentsFunc(resp)
				287	}
				288	return nil, nil
				289	}
				290
				291	func (m *MockConvoInterface) CancelToolUse(toolUseID string, cause error) error {
				292	if m.cancelToolUseFunc != nil {
				293	return m.cancelToolUseFunc(toolUseID, cause)
				294	}
				295	return nil
				296	}
				297
				298	func (m *MockConvoInterface) CumulativeUsage() ant.CumulativeUsage {
				299	if m.cumulativeUsageFunc != nil {
				300	return m.cumulativeUsageFunc()
				301	}
				302	return ant.CumulativeUsage{}
				303	}
				304
				305	func (m *MockConvoInterface) ResetBudget(budget ant.Budget) {
				306	if m.resetBudgetFunc != nil {
				307	m.resetBudgetFunc(budget)
				308	}
				309	}
				310
				311	func (m *MockConvoInterface) OverBudget() error {
				312	if m.overBudgetFunc != nil {
				313	return m.overBudgetFunc()
				314	}
				315	return nil
				316	}
				317
Philip Zeyliger	2c4db09	2025-04-28 16:57:50 -0700	[diff] [blame]	318	func (m *MockConvoInterface) GetID() string {
				319	if m.getIDFunc != nil {
				320	return m.getIDFunc()
				321	}
				322	return "mock-convo-id"
				323	}
				324
				325	func (m MockConvoInterface) SubConvoWithHistory() ant.Convo {
				326	if m.subConvoWithHistoryFunc != nil {
				327	return m.subConvoWithHistoryFunc()
				328	}
				329	return nil
				330	}
				331
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	332	// TestAgentProcessTurnWithNilResponseNilError tests the scenario where Agent.processTurn receives
				333	// a nil value for initialResp and nil error from processUserMessage.
				334	// This test verifies that the implementation properly handles this edge case.
				335	func TestAgentProcessTurnWithNilResponseNilError(t *testing.T) {
				336	// Create a mock conversation that will return nil response and nil error
				337	mockConvo := &MockConvoInterface{
				338	sendMessageFunc: func(message ant.Message) (*ant.MessageResponse, error) {
				339	return nil, nil // This is unusual but now handled gracefully
				340	},
				341	}
				342
				343	// Create a minimal Agent instance for testing
				344	agent := &Agent{
				345	convo: mockConvo,
				346	inbox: make(chan string, 10),
Philip Zeyliger	9373c07	2025-05-01 10:27:01 -0700	[diff] [blame^]	347	subscribers: []chan *AgentMessage{},
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	348	outstandingLLMCalls: make(map[string]struct{}),
				349	outstandingToolCalls: make(map[string]string),
				350	}
				351
				352	// Create a test context
				353	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
				354	defer cancel()
				355
				356	// Push a test message to the inbox so that processUserMessage will try to process it
				357	agent.inbox <- "Test message"
				358
				359	// Call processTurn - it should handle nil initialResp with a descriptive error
				360	err := agent.processTurn(ctx)
				361
				362	// Verify we get the expected error
				363	if err == nil {
				364	t.Error("Expected processTurn to return an error for nil initialResp, but got nil")
				365	} else if !strings.Contains(err.Error(), "unexpected nil response") {
				366	t.Errorf("Expected error about nil response, got: %v", err)
				367	} else {
				368	t.Logf("As expected, processTurn returned error: %v", err)
				369	}
				370
Philip Zeyliger	9373c07	2025-05-01 10:27:01 -0700	[diff] [blame^]	371	// Verify error message was added to history
				372	agent.mu.Lock()
				373	defer agent.mu.Unlock()
				374
				375	// There should be exactly one message
				376	if len(agent.history) != 1 {
				377	t.Errorf("Expected exactly one message, got %d", len(agent.history))
				378	} else {
				379	msg := agent.history[0]
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	380	if msg.Type != ErrorMessageType {
				381	t.Errorf("Expected error message type, got: %s", msg.Type)
				382	}
				383	if !strings.Contains(msg.Content, "unexpected nil response") {
				384	t.Errorf("Expected error about nil response, got: %s", msg.Content)
				385	}
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	386	}
				387	}
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	388
				389	func TestAgentStateMachine(t *testing.T) {
				390	// Create a simplified test for the state machine functionality
				391	agent := &Agent{
				392	stateMachine: NewStateMachine(),
				393	}
				394
				395	// Initially the state should be Ready
				396	if state := agent.CurrentState(); state != StateReady {
				397	t.Errorf("Expected initial state to be StateReady, got %s", state)
				398	}
				399
				400	// Test manual transitions to verify state tracking
				401	ctx := context.Background()
				402
				403	// Track transitions
				404	var transitions []State
				405	agent.stateMachine.SetTransitionCallback(func(ctx context.Context, from, to State, event TransitionEvent) {
				406	transitions = append(transitions, to)
				407	t.Logf("State transition: %s -> %s (%s)", from, to, event.Description)
				408	})
				409
				410	// Perform a valid sequence of transitions (based on the state machine rules)
				411	expectedStates := []State{
				412	StateWaitingForUserInput,
				413	StateSendingToLLM,
				414	StateProcessingLLMResponse,
				415	StateToolUseRequested,
				416	StateCheckingForCancellation,
				417	StateRunningTool,
				418	StateCheckingGitCommits,
				419	StateRunningAutoformatters,
				420	StateCheckingBudget,
				421	StateGatheringAdditionalMessages,
				422	StateSendingToolResults,
				423	StateProcessingLLMResponse,
				424	StateEndOfTurn,
				425	}
				426
				427	// Manually perform each transition
				428	for _, state := range expectedStates {
				429	err := agent.stateMachine.Transition(ctx, state, "Test transition to "+state.String())
				430	if err != nil {
				431	t.Errorf("Failed to transition to %s: %v", state, err)
				432	}
				433	}
				434
				435	// Check if we recorded the right number of transitions
				436	if len(transitions) != len(expectedStates) {
				437	t.Errorf("Expected %d state transitions, got %d", len(expectedStates), len(transitions))
				438	}
				439
				440	// Check each transition matched what we expected
				441	for i, expected := range expectedStates {
				442	if i < len(transitions) {
				443	if transitions[i] != expected {
				444	t.Errorf("Transition %d: expected %s, got %s", i, expected, transitions[i])
				445	}
				446	}
				447	}
				448
				449	// Verify the current state is the last one we transitioned to
				450	if state := agent.CurrentState(); state != expectedStates[len(expectedStates)-1] {
				451	t.Errorf("Expected current state to be %s, got %s", expectedStates[len(expectedStates)-1], state)
				452	}
				453
				454	// Test force transition
				455	agent.stateMachine.ForceTransition(ctx, StateCancelled, "Testing force transition")
				456
				457	// Verify current state was updated
				458	if state := agent.CurrentState(); state != StateCancelled {
				459	t.Errorf("Expected forced state to be StateCancelled, got %s", state)
				460	}
				461	}
				462
				463	// mockConvoInterface is a mock implementation of ConvoInterface for testing
				464	type mockConvoInterface struct {
				465	SendMessageFunc func(message ant.Message) (*ant.MessageResponse, error)
				466	ToolResultContentsFunc func(ctx context.Context, resp *ant.MessageResponse) ([]ant.Content, error)
				467	}
				468
				469	func (c *mockConvoInterface) GetID() string {
				470	return "mockConvoInterface-id"
				471	}
				472
				473	func (c mockConvoInterface) SubConvoWithHistory() ant.Convo {
				474	return nil
				475	}
				476
				477	func (m *mockConvoInterface) CumulativeUsage() ant.CumulativeUsage {
				478	return ant.CumulativeUsage{}
				479	}
				480
				481	func (m *mockConvoInterface) ResetBudget(ant.Budget) {}
				482
				483	func (m *mockConvoInterface) OverBudget() error {
				484	return nil
				485	}
				486
				487	func (m mockConvoInterface) SendMessage(message ant.Message) (ant.MessageResponse, error) {
				488	if m.SendMessageFunc != nil {
				489	return m.SendMessageFunc(message)
				490	}
				491	return &ant.MessageResponse{StopReason: ant.StopReasonEndTurn}, nil
				492	}
				493
				494	func (m mockConvoInterface) SendUserTextMessage(s string, otherContents ...ant.Content) (ant.MessageResponse, error) {
				495	return m.SendMessage(ant.Message{Role: "user", Content: []ant.Content{{Type: "text", Text: s}}})
				496	}
				497
				498	func (m mockConvoInterface) ToolResultContents(ctx context.Context, resp ant.MessageResponse) ([]ant.Content, error) {
				499	if m.ToolResultContentsFunc != nil {
				500	return m.ToolResultContentsFunc(ctx, resp)
				501	}
				502	return []ant.Content{}, nil
				503	}
				504
				505	func (m mockConvoInterface) ToolResultCancelContents(resp ant.MessageResponse) ([]ant.Content, error) {
				506	return []ant.Content{{Type: "text", Text: "Tool use cancelled"}}, nil
				507	}
				508
				509	func (m *mockConvoInterface) CancelToolUse(toolUseID string, cause error) error {
				510	return nil
				511	}
				512
				513	func TestAgentProcessTurnStateTransitions(t *testing.T) {
				514	// Create a mock ConvoInterface for testing
				515	mockConvo := &mockConvoInterface{}
				516
				517	// Use the testing context
				518	ctx := t.Context()
				519
				520	// Create an agent with the state machine
				521	agent := &Agent{
				522	convo: mockConvo,
				523	config: AgentConfig{Context: ctx},
				524	inbox: make(chan string, 10),
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	525	ready: make(chan struct{}),
				526	seenCommits: make(map[string]bool),
				527	outstandingLLMCalls: make(map[string]struct{}),
				528	outstandingToolCalls: make(map[string]string),
				529	stateMachine: NewStateMachine(),
				530	startOfTurn: time.Now(),
Philip Zeyliger	9373c07	2025-05-01 10:27:01 -0700	[diff] [blame^]	531	subscribers: []chan *AgentMessage{},
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	532	}
				533
				534	// Verify initial state
				535	if state := agent.CurrentState(); state != StateReady {
				536	t.Errorf("Expected initial state to be StateReady, got %s", state)
				537	}
				538
				539	// Add a message to the inbox so we don't block in GatherMessages
				540	agent.inbox <- "Test message"
				541
				542	// Setup the mock to simulate a model response with end of turn
				543	mockConvo.SendMessageFunc = func(message ant.Message) (*ant.MessageResponse, error) {
				544	return &ant.MessageResponse{
				545	StopReason: ant.StopReasonEndTurn,
				546	Content: []ant.Content{
				547	{Type: "text", Text: "This is a test response"},
				548	},
				549	}, nil
				550	}
				551
				552	// Track state transitions
				553	var transitions []State
				554	agent.stateMachine.SetTransitionCallback(func(ctx context.Context, from, to State, event TransitionEvent) {
				555	transitions = append(transitions, to)
				556	t.Logf("State transition: %s -> %s (%s)", from, to, event.Description)
				557	})
				558
				559	// Process a turn, which should trigger state transitions
				560	agent.processTurn(ctx)
				561
				562	// The minimum expected states for a simple end-of-turn response
				563	minExpectedStates := []State{
				564	StateWaitingForUserInput,
				565	StateSendingToLLM,
				566	StateProcessingLLMResponse,
				567	StateEndOfTurn,
				568	}
				569
				570	// Verify we have at least the minimum expected states
				571	if len(transitions) < len(minExpectedStates) {
				572	t.Errorf("Expected at least %d state transitions, got %d", len(minExpectedStates), len(transitions))
				573	}
				574
				575	// Check that the transitions follow the expected sequence
				576	for i, expected := range minExpectedStates {
				577	if i < len(transitions) {
				578	if transitions[i] != expected {
				579	t.Errorf("Transition %d: expected %s, got %s", i, expected, transitions[i])
				580	}
				581	}
				582	}
				583
				584	// Verify the final state is EndOfTurn
				585	if state := agent.CurrentState(); state != StateEndOfTurn {
				586	t.Errorf("Expected final state to be StateEndOfTurn, got %s", state)
				587	}
				588	}
				589
				590	func TestAgentProcessTurnWithToolUse(t *testing.T) {
				591	// Create a mock ConvoInterface for testing
				592	mockConvo := &mockConvoInterface{}
				593
				594	// Setup a test context
				595	ctx := context.Background()
				596
				597	// Create an agent with the state machine
				598	agent := &Agent{
				599	convo: mockConvo,
				600	config: AgentConfig{Context: ctx},
				601	inbox: make(chan string, 10),
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	602	ready: make(chan struct{}),
				603	seenCommits: make(map[string]bool),
				604	outstandingLLMCalls: make(map[string]struct{}),
				605	outstandingToolCalls: make(map[string]string),
				606	stateMachine: NewStateMachine(),
				607	startOfTurn: time.Now(),
Philip Zeyliger	9373c07	2025-05-01 10:27:01 -0700	[diff] [blame^]	608	subscribers: []chan *AgentMessage{},
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	609	}
				610
				611	// Add a message to the inbox so we don't block in GatherMessages
				612	agent.inbox <- "Test message"
				613
				614	// First response requests a tool
				615	firstResponseDone := false
				616	mockConvo.SendMessageFunc = func(message ant.Message) (*ant.MessageResponse, error) {
				617	if !firstResponseDone {
				618	firstResponseDone = true
				619	return &ant.MessageResponse{
				620	StopReason: ant.StopReasonToolUse,
				621	Content: []ant.Content{
				622	{Type: "text", Text: "I'll use a tool"},
				623	{Type: "tool_use", ToolName: "test_tool", ToolInput: []byte("{}"), ID: "test_id"},
				624	},
				625	}, nil
				626	}
				627	// Second response ends the turn
				628	return &ant.MessageResponse{
				629	StopReason: ant.StopReasonEndTurn,
				630	Content: []ant.Content{
				631	{Type: "text", Text: "Finished using the tool"},
				632	},
				633	}, nil
				634	}
				635
				636	// Tool result content handler
				637	mockConvo.ToolResultContentsFunc = func(ctx context.Context, resp *ant.MessageResponse) ([]ant.Content, error) {
				638	return []ant.Content{{Type: "text", Text: "Tool executed successfully"}}, nil
				639	}
				640
				641	// Track state transitions
				642	var transitions []State
				643	agent.stateMachine.SetTransitionCallback(func(ctx context.Context, from, to State, event TransitionEvent) {
				644	transitions = append(transitions, to)
				645	t.Logf("State transition: %s -> %s (%s)", from, to, event.Description)
				646	})
				647
				648	// Process a turn with tool use
				649	agent.processTurn(ctx)
				650
				651	// Define expected states for a tool use flow
				652	expectedToolStates := []State{
				653	StateWaitingForUserInput,
				654	StateSendingToLLM,
				655	StateProcessingLLMResponse,
				656	StateToolUseRequested,
				657	StateCheckingForCancellation,
				658	StateRunningTool,
				659	}
				660
				661	// Verify that these states are present in order
				662	for i, expectedState := range expectedToolStates {
				663	if i >= len(transitions) {
				664	t.Errorf("Missing expected transition to %s; only got %d transitions", expectedState, len(transitions))
				665	continue
				666	}
				667	if transitions[i] != expectedState {
				668	t.Errorf("Expected transition %d to be %s, got %s", i, expectedState, transitions[i])
				669	}
				670	}
				671
				672	// Also verify we eventually reached EndOfTurn
				673	if !slices.Contains(transitions, StateEndOfTurn) {
				674	t.Errorf("Expected to eventually reach StateEndOfTurn, but never did")
				675	}
				676	}