Blame - loop/agent_test.go - sketch

blob: 38422a3a482df341de8f42fc5098ba3396f10450 [file] [log] [blame]

Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	1	package loop
				2
				3	import (
Josh Bleecher Snyder	4d5e997	2025-05-01 15:56:37 -0700	[diff] [blame]	4	"cmp"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	5	"context"
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	6	"fmt"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	7	"net/http"
				8	"os"
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	9	"slices"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	10	"strings"
				11	"testing"
				12	"time"
				13
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	14	"sketch.dev/httprr"
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	15	"sketch.dev/llm"
				16	"sketch.dev/llm/ant"
				17	"sketch.dev/llm/conversation"
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	18	)
				19
				20	// TestAgentLoop tests that the Agent loop functionality works correctly.
				21	// It uses the httprr package to record HTTP interactions for replay in tests.
				22	// When failing, rebuild with "go test ./sketch/loop -run TestAgentLoop -httprecord .agent_loop."
				23	// as necessary.
				24	func TestAgentLoop(t *testing.T) {
				25	ctx := context.Background()
				26
				27	// Setup httprr recorder
				28	rrPath := "testdata/agent_loop.httprr"
				29	rr, err := httprr.Open(rrPath, http.DefaultTransport)
				30	if err != nil && !os.IsNotExist(err) {
				31	t.Fatal(err)
				32	}
				33
				34	if rr.Recording() {
				35	// Skip the test if API key is not available
				36	if os.Getenv("ANTHROPIC_API_KEY") == "" {
				37	t.Fatal("ANTHROPIC_API_KEY not set, required for HTTP recording")
				38	}
				39	}
				40
				41	// Create HTTP client
				42	var client *http.Client
				43	if rr != nil {
				44	// Scrub API keys from requests for security
				45	rr.ScrubReq(func(req *http.Request) error {
				46	req.Header.Del("x-api-key")
				47	req.Header.Del("anthropic-api-key")
				48	return nil
				49	})
				50	client = rr.Client()
				51	} else {
				52	client = &http.Client{Transport: http.DefaultTransport}
				53	}
				54
				55	// Create a new agent with the httprr client
				56	origWD, err := os.Getwd()
				57	if err != nil {
				58	t.Fatal(err)
				59	}
				60	if err := os.Chdir("/"); err != nil {
				61	t.Fatal(err)
				62	}
Philip Zeyliger	e6c294d	2025-06-04 16:55:21 +0000	[diff] [blame]	63	budget := conversation.Budget{MaxDollars: 10.0}
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	64	wd, err := os.Getwd()
				65	if err != nil {
				66	t.Fatal(err)
				67	}
				68
David Crawshaw	3659d87	2025-05-05 17:52:23 -0700	[diff] [blame]	69	apiKey := cmp.Or(os.Getenv("OUTER_SKETCH_MODEL_API_KEY"), os.Getenv("ANTHROPIC_API_KEY"))
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	70	cfg := AgentConfig{
Philip Zeyliger	bc8c8dc	2025-05-21 13:19:13 -0700	[diff] [blame]	71	Context: ctx,
				72	WorkingDir: wd,
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	73	Service: &ant.Service{
				74	APIKey: apiKey,
				75	HTTPC: client,
				76	},
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	77	Budget: budget,
				78	GitUsername: "Test Agent",
				79	GitEmail: "totallyhuman@sketch.dev",
				80	SessionID: "test-session-id",
				81	ClientGOOS: "linux",
				82	ClientGOARCH: "amd64",
				83	}
				84	agent := NewAgent(cfg)
				85	if err := os.Chdir(origWD); err != nil {
				86	t.Fatal(err)
				87	}
Philip Zeyliger	bc8c8dc	2025-05-21 13:19:13 -0700	[diff] [blame]	88	err = agent.Init(AgentInit{NoGit: true})
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	89	if err != nil {
				90	t.Fatal(err)
				91	}
				92
				93	// Setup a test message that will trigger a simple, predictable response
Josh Bleecher Snyder	3b44cc3	2025-07-22 02:28:14 +0000	[diff] [blame]	94	userMessage := "What tools are available to you? Please just list them briefly."
				95
				96	// Set a slug so that the agent doesn't have to.
				97	agent.SetSlug("list-available-tools")
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	98
				99	// Send the message to the agent
				100	agent.UserMessage(ctx, userMessage)
				101
				102	// Process a single loop iteration to avoid long-running tests
Sean McCullough	885a16a	2025-04-30 02:49:25 +0000	[diff] [blame]	103	agent.processTurn(ctx)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	104
				105	// Collect responses with a timeout
				106	var responses []AgentMessage
Philip Zeyliger	9373c07	2025-05-01 10:27:01 -0700	[diff] [blame]	107	ctx2, cancel := context.WithDeadline(ctx, time.Now().Add(10*time.Second))
				108	defer cancel()
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	109	done := false
Philip Zeyliger	b7c5875	2025-05-01 10:10:17 -0700	[diff] [blame]	110	it := agent.NewIterator(ctx2, 0)
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	111
				112	for !done {
Philip Zeyliger	b7c5875	2025-05-01 10:10:17 -0700	[diff] [blame]	113	msg := it.Next()
				114	t.Logf("Received message: Type=%s, EndOfTurn=%v, Content=%q", msg.Type, msg.EndOfTurn, msg.Content)
				115	responses = append(responses, *msg)
				116	if msg.EndOfTurn {
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	117	done = true
Earl Lee	2e463fb	2025-04-17 11:22:22 -0700	[diff] [blame]	118	}
				119	}
				120
				121	// Verify we got at least one response
				122	if len(responses) == 0 {
				123	t.Fatal("No responses received from agent")
				124	}
				125
				126	// Log the received responses for debugging
				127	t.Logf("Received %d responses", len(responses))
				128
				129	// Find the final agent response (with EndOfTurn=true)
				130	var finalResponse *AgentMessage
				131	for i := range responses {
				132	if responses[i].Type == AgentMessageType && responses[i].EndOfTurn {
				133	finalResponse = &responses[i]
				134	break
				135	}
				136	}
				137
				138	// Verify we got a final agent response
				139	if finalResponse == nil {
				140	t.Fatal("No final agent response received")
				141	}
				142
				143	// Check that the response contains tools information
				144	if !strings.Contains(strings.ToLower(finalResponse.Content), "tool") {
				145	t.Error("Expected response to mention tools")
				146	}
				147
				148	// Count how many tool use messages we received
				149	toolUseCount := 0
				150	for _, msg := range responses {
				151	if msg.Type == ToolUseMessageType {
				152	toolUseCount++
				153	}
				154	}
				155
				156	t.Logf("Agent used %d tools in its response", toolUseCount)
				157	}
Philip Zeyliger	99a9a02	2025-04-27 15:15:25 +0000	[diff] [blame]	158
				159	func TestAgentTracksOutstandingCalls(t *testing.T) {
				160	agent := &Agent{
				161	outstandingLLMCalls: make(map[string]struct{}),
				162	outstandingToolCalls: make(map[string]string),
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	163	stateMachine: NewStateMachine(),
Philip Zeyliger	99a9a02	2025-04-27 15:15:25 +0000	[diff] [blame]	164	}
				165
				166	// Check initial state
				167	if count := agent.OutstandingLLMCallCount(); count != 0 {
				168	t.Errorf("Expected 0 outstanding LLM calls, got %d", count)
				169	}
				170
				171	if tools := agent.OutstandingToolCalls(); len(tools) != 0 {
				172	t.Errorf("Expected 0 outstanding tool calls, got %d", len(tools))
				173	}
				174
				175	// Add some calls
				176	agent.mu.Lock()
				177	agent.outstandingLLMCalls["llm1"] = struct{}{}
				178	agent.outstandingToolCalls["tool1"] = "bash"
				179	agent.outstandingToolCalls["tool2"] = "think"
				180	agent.mu.Unlock()
				181
				182	// Check tracking works
				183	if count := agent.OutstandingLLMCallCount(); count != 1 {
				184	t.Errorf("Expected 1 outstanding LLM call, got %d", count)
				185	}
				186
				187	tools := agent.OutstandingToolCalls()
				188	if len(tools) != 2 {
				189	t.Errorf("Expected 2 outstanding tool calls, got %d", len(tools))
				190	}
				191
				192	// Check removal
				193	agent.mu.Lock()
				194	delete(agent.outstandingLLMCalls, "llm1")
				195	delete(agent.outstandingToolCalls, "tool1")
				196	agent.mu.Unlock()
				197
				198	if count := agent.OutstandingLLMCallCount(); count != 0 {
				199	t.Errorf("Expected 0 outstanding LLM calls after removal, got %d", count)
				200	}
				201
				202	tools = agent.OutstandingToolCalls()
				203	if len(tools) != 1 {
				204	t.Errorf("Expected 1 outstanding tool call after removal, got %d", len(tools))
				205	}
				206
				207	if tools[0] != "think" {
				208	t.Errorf("Expected 'think' tool remaining, got %s", tools[0])
				209	}
				210	}
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	211
				212	// TestAgentProcessTurnWithNilResponse tests the scenario where Agent.processTurn receives
				213	// a nil value for initialResp from processUserMessage.
				214	func TestAgentProcessTurnWithNilResponse(t *testing.T) {
				215	// Create a mock conversation that will return nil and error
				216	mockConvo := &MockConvoInterface{
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	217	sendMessageFunc: func(message llm.Message) (*llm.Response, error) {
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	218	return nil, fmt.Errorf("test error: simulating nil response")
				219	},
				220	}
				221
				222	// Create a minimal Agent instance for testing
				223	agent := &Agent{
				224	convo: mockConvo,
				225	inbox: make(chan string, 10),
Philip Zeyliger	9373c07	2025-05-01 10:27:01 -0700	[diff] [blame]	226	subscribers: []chan *AgentMessage{},
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	227	outstandingLLMCalls: make(map[string]struct{}),
				228	outstandingToolCalls: make(map[string]string),
				229	}
				230
				231	// Create a test context
				232	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
				233	defer cancel()
				234
				235	// Push a test message to the inbox so that processUserMessage will try to process it
				236	agent.inbox <- "Test message"
				237
				238	// Call processTurn - it should exit early without panic when initialResp is nil
				239	agent.processTurn(ctx)
				240
Philip Zeyliger	9373c07	2025-05-01 10:27:01 -0700	[diff] [blame]	241	// Verify error message was added to history
				242	agent.mu.Lock()
				243	defer agent.mu.Unlock()
				244
Josh Bleecher Snyder	3b44cc3	2025-07-22 02:28:14 +0000	[diff] [blame]	245	// There should be exactly two messages: slug + error
				246	if len(agent.history) != 2 {
				247	t.Errorf("Expected exactly two messages (slug + error), got %d", len(agent.history))
Philip Zeyliger	9373c07	2025-05-01 10:27:01 -0700	[diff] [blame]	248	} else {
Josh Bleecher Snyder	3b44cc3	2025-07-22 02:28:14 +0000	[diff] [blame]	249	slugMsg := agent.history[0]
				250	if slugMsg.Type != SlugMessageType {
				251	t.Errorf("Expected first message to be slug, got message type: %s", slugMsg.Type)
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	252	}
Josh Bleecher Snyder	3b44cc3	2025-07-22 02:28:14 +0000	[diff] [blame]	253	errorMsg := agent.history[1]
				254	if errorMsg.Type != ErrorMessageType {
				255	t.Errorf("Expected second message to be error, got message type: %s", errorMsg.Type)
				256	}
				257	if !strings.Contains(errorMsg.Content, "simulating nil response") {
				258	t.Errorf("Expected error message to contain 'simulating nil response', got: %s", errorMsg.Content)
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	259	}
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	260	}
				261	}
				262
				263	// MockConvoInterface implements the ConvoInterface for testing
				264	type MockConvoInterface struct {
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	265	sendMessageFunc func(message llm.Message) (*llm.Response, error)
				266	sendUserTextMessageFunc func(s string, otherContents ...llm.Content) (*llm.Response, error)
Josh Bleecher Snyder	64f2aa8	2025-05-14 18:31:05 +0000	[diff] [blame]	267	toolResultContentsFunc func(ctx context.Context, resp *llm.Response) ([]llm.Content, bool, error)
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	268	toolResultCancelContentsFunc func(resp *llm.Response) ([]llm.Content, error)
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	269	cancelToolUseFunc func(toolUseID string, cause error) error
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	270	cumulativeUsageFunc func() conversation.CumulativeUsage
Philip Zeyliger	b8a8f35	2025-06-02 07:39:37 -0700	[diff] [blame]	271	lastUsageFunc func() llm.Usage
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	272	resetBudgetFunc func(conversation.Budget)
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	273	overBudgetFunc func() error
Philip Zeyliger	2c4db09	2025-04-28 16:57:50 -0700	[diff] [blame]	274	getIDFunc func() string
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	275	subConvoWithHistoryFunc func() *conversation.Convo
Philip Zeyliger	43a0bfc	2025-07-14 14:54:27 -0700	[diff] [blame]	276	debugJSONFunc func() ([]byte, error)
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	277	}
				278
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	279	func (m MockConvoInterface) SendMessage(message llm.Message) (llm.Response, error) {
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	280	if m.sendMessageFunc != nil {
				281	return m.sendMessageFunc(message)
				282	}
				283	return nil, nil
				284	}
				285
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	286	func (m MockConvoInterface) SendUserTextMessage(s string, otherContents ...llm.Content) (llm.Response, error) {
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	287	if m.sendUserTextMessageFunc != nil {
				288	return m.sendUserTextMessageFunc(s, otherContents...)
				289	}
				290	return nil, nil
				291	}
				292
Josh Bleecher Snyder	64f2aa8	2025-05-14 18:31:05 +0000	[diff] [blame]	293	func (m MockConvoInterface) ToolResultContents(ctx context.Context, resp llm.Response) ([]llm.Content, bool, error) {
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	294	if m.toolResultContentsFunc != nil {
				295	return m.toolResultContentsFunc(ctx, resp)
				296	}
Josh Bleecher Snyder	64f2aa8	2025-05-14 18:31:05 +0000	[diff] [blame]	297	return nil, false, nil
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	298	}
				299
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	300	func (m MockConvoInterface) ToolResultCancelContents(resp llm.Response) ([]llm.Content, error) {
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	301	if m.toolResultCancelContentsFunc != nil {
				302	return m.toolResultCancelContentsFunc(resp)
				303	}
				304	return nil, nil
				305	}
				306
				307	func (m *MockConvoInterface) CancelToolUse(toolUseID string, cause error) error {
				308	if m.cancelToolUseFunc != nil {
				309	return m.cancelToolUseFunc(toolUseID, cause)
				310	}
				311	return nil
				312	}
				313
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	314	func (m *MockConvoInterface) CumulativeUsage() conversation.CumulativeUsage {
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	315	if m.cumulativeUsageFunc != nil {
				316	return m.cumulativeUsageFunc()
				317	}
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	318	return conversation.CumulativeUsage{}
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	319	}
				320
Philip Zeyliger	b8a8f35	2025-06-02 07:39:37 -0700	[diff] [blame]	321	func (m *MockConvoInterface) LastUsage() llm.Usage {
				322	if m.lastUsageFunc != nil {
				323	return m.lastUsageFunc()
				324	}
				325	return llm.Usage{}
				326	}
				327
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	328	func (m *MockConvoInterface) ResetBudget(budget conversation.Budget) {
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	329	if m.resetBudgetFunc != nil {
				330	m.resetBudgetFunc(budget)
				331	}
				332	}
				333
				334	func (m *MockConvoInterface) OverBudget() error {
				335	if m.overBudgetFunc != nil {
				336	return m.overBudgetFunc()
				337	}
				338	return nil
				339	}
				340
Philip Zeyliger	2c4db09	2025-04-28 16:57:50 -0700	[diff] [blame]	341	func (m *MockConvoInterface) GetID() string {
				342	if m.getIDFunc != nil {
				343	return m.getIDFunc()
				344	}
				345	return "mock-convo-id"
				346	}
				347
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	348	func (m MockConvoInterface) SubConvoWithHistory() conversation.Convo {
Philip Zeyliger	2c4db09	2025-04-28 16:57:50 -0700	[diff] [blame]	349	if m.subConvoWithHistoryFunc != nil {
				350	return m.subConvoWithHistoryFunc()
				351	}
				352	return nil
				353	}
				354
Philip Zeyliger	43a0bfc	2025-07-14 14:54:27 -0700	[diff] [blame]	355	func (m *MockConvoInterface) DebugJSON() ([]byte, error) {
				356	if m.debugJSONFunc != nil {
				357	return m.debugJSONFunc()
				358	}
				359	return []byte(`[{"role": "user", "content": [{"type": "text", "text": "mock conversation"}]}]`), nil
				360	}
				361
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	362	// TestAgentProcessTurnWithNilResponseNilError tests the scenario where Agent.processTurn receives
				363	// a nil value for initialResp and nil error from processUserMessage.
				364	// This test verifies that the implementation properly handles this edge case.
				365	func TestAgentProcessTurnWithNilResponseNilError(t *testing.T) {
				366	// Create a mock conversation that will return nil response and nil error
				367	mockConvo := &MockConvoInterface{
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	368	sendMessageFunc: func(message llm.Message) (*llm.Response, error) {
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	369	return nil, nil // This is unusual but now handled gracefully
				370	},
				371	}
				372
				373	// Create a minimal Agent instance for testing
				374	agent := &Agent{
				375	convo: mockConvo,
				376	inbox: make(chan string, 10),
Philip Zeyliger	9373c07	2025-05-01 10:27:01 -0700	[diff] [blame]	377	subscribers: []chan *AgentMessage{},
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	378	outstandingLLMCalls: make(map[string]struct{}),
				379	outstandingToolCalls: make(map[string]string),
				380	}
				381
				382	// Create a test context
				383	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
				384	defer cancel()
				385
				386	// Push a test message to the inbox so that processUserMessage will try to process it
				387	agent.inbox <- "Test message"
				388
				389	// Call processTurn - it should handle nil initialResp with a descriptive error
				390	err := agent.processTurn(ctx)
				391
				392	// Verify we get the expected error
				393	if err == nil {
				394	t.Error("Expected processTurn to return an error for nil initialResp, but got nil")
				395	} else if !strings.Contains(err.Error(), "unexpected nil response") {
				396	t.Errorf("Expected error about nil response, got: %v", err)
				397	} else {
				398	t.Logf("As expected, processTurn returned error: %v", err)
				399	}
				400
Philip Zeyliger	9373c07	2025-05-01 10:27:01 -0700	[diff] [blame]	401	// Verify error message was added to history
				402	agent.mu.Lock()
				403	defer agent.mu.Unlock()
				404
Josh Bleecher Snyder	3b44cc3	2025-07-22 02:28:14 +0000	[diff] [blame]	405	// There should be exactly two messages: slug + error
				406	if len(agent.history) != 2 {
				407	t.Errorf("Expected exactly two messages (slug + error), got %d", len(agent.history))
Philip Zeyliger	9373c07	2025-05-01 10:27:01 -0700	[diff] [blame]	408	} else {
Josh Bleecher Snyder	3b44cc3	2025-07-22 02:28:14 +0000	[diff] [blame]	409	slugMsg := agent.history[0]
				410	if slugMsg.Type != SlugMessageType {
				411	t.Errorf("Expected first message to be slug, got message type: %s", slugMsg.Type)
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	412	}
Josh Bleecher Snyder	3b44cc3	2025-07-22 02:28:14 +0000	[diff] [blame]	413	errorMsg := agent.history[1]
				414	if errorMsg.Type != ErrorMessageType {
				415	t.Errorf("Expected second message to be error, got message type: %s", errorMsg.Type)
				416	}
				417	if !strings.Contains(errorMsg.Content, "unexpected nil response") {
				418	t.Errorf("Expected error about nil response, got: %s", errorMsg.Content)
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	419	}
Sean McCullough	9f4b808	2025-04-30 17:34:07 +0000	[diff] [blame]	420	}
				421	}
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	422
				423	func TestAgentStateMachine(t *testing.T) {
				424	// Create a simplified test for the state machine functionality
				425	agent := &Agent{
				426	stateMachine: NewStateMachine(),
				427	}
				428
				429	// Initially the state should be Ready
				430	if state := agent.CurrentState(); state != StateReady {
				431	t.Errorf("Expected initial state to be StateReady, got %s", state)
				432	}
				433
				434	// Test manual transitions to verify state tracking
				435	ctx := context.Background()
				436
				437	// Track transitions
				438	var transitions []State
				439	agent.stateMachine.SetTransitionCallback(func(ctx context.Context, from, to State, event TransitionEvent) {
				440	transitions = append(transitions, to)
				441	t.Logf("State transition: %s -> %s (%s)", from, to, event.Description)
				442	})
				443
				444	// Perform a valid sequence of transitions (based on the state machine rules)
				445	expectedStates := []State{
				446	StateWaitingForUserInput,
				447	StateSendingToLLM,
				448	StateProcessingLLMResponse,
				449	StateToolUseRequested,
				450	StateCheckingForCancellation,
				451	StateRunningTool,
				452	StateCheckingGitCommits,
				453	StateRunningAutoformatters,
				454	StateCheckingBudget,
				455	StateGatheringAdditionalMessages,
				456	StateSendingToolResults,
				457	StateProcessingLLMResponse,
				458	StateEndOfTurn,
				459	}
				460
				461	// Manually perform each transition
				462	for _, state := range expectedStates {
				463	err := agent.stateMachine.Transition(ctx, state, "Test transition to "+state.String())
				464	if err != nil {
				465	t.Errorf("Failed to transition to %s: %v", state, err)
				466	}
				467	}
				468
				469	// Check if we recorded the right number of transitions
				470	if len(transitions) != len(expectedStates) {
				471	t.Errorf("Expected %d state transitions, got %d", len(expectedStates), len(transitions))
				472	}
				473
				474	// Check each transition matched what we expected
				475	for i, expected := range expectedStates {
				476	if i < len(transitions) {
				477	if transitions[i] != expected {
				478	t.Errorf("Transition %d: expected %s, got %s", i, expected, transitions[i])
				479	}
				480	}
				481	}
				482
				483	// Verify the current state is the last one we transitioned to
				484	if state := agent.CurrentState(); state != expectedStates[len(expectedStates)-1] {
				485	t.Errorf("Expected current state to be %s, got %s", expectedStates[len(expectedStates)-1], state)
				486	}
				487
				488	// Test force transition
				489	agent.stateMachine.ForceTransition(ctx, StateCancelled, "Testing force transition")
				490
				491	// Verify current state was updated
				492	if state := agent.CurrentState(); state != StateCancelled {
				493	t.Errorf("Expected forced state to be StateCancelled, got %s", state)
				494	}
				495	}
				496
				497	// mockConvoInterface is a mock implementation of ConvoInterface for testing
				498	type mockConvoInterface struct {
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	499	SendMessageFunc func(message llm.Message) (*llm.Response, error)
Josh Bleecher Snyder	64f2aa8	2025-05-14 18:31:05 +0000	[diff] [blame]	500	ToolResultContentsFunc func(ctx context.Context, resp *llm.Response) ([]llm.Content, bool, error)
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	501	}
				502
				503	func (c *mockConvoInterface) GetID() string {
				504	return "mockConvoInterface-id"
				505	}
				506
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	507	func (c mockConvoInterface) SubConvoWithHistory() conversation.Convo {
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	508	return nil
				509	}
				510
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	511	func (m *mockConvoInterface) CumulativeUsage() conversation.CumulativeUsage {
				512	return conversation.CumulativeUsage{}
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	513	}
				514
Philip Zeyliger	b8a8f35	2025-06-02 07:39:37 -0700	[diff] [blame]	515	func (m *mockConvoInterface) LastUsage() llm.Usage {
				516	return llm.Usage{}
				517	}
				518
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	519	func (m *mockConvoInterface) ResetBudget(conversation.Budget) {}
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	520
				521	func (m *mockConvoInterface) OverBudget() error {
				522	return nil
				523	}
				524
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	525	func (m mockConvoInterface) SendMessage(message llm.Message) (llm.Response, error) {
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	526	if m.SendMessageFunc != nil {
				527	return m.SendMessageFunc(message)
				528	}
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	529	return &llm.Response{StopReason: llm.StopReasonEndTurn}, nil
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	530	}
				531
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	532	func (m mockConvoInterface) SendUserTextMessage(s string, otherContents ...llm.Content) (llm.Response, error) {
				533	return m.SendMessage(llm.UserStringMessage(s))
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	534	}
				535
Josh Bleecher Snyder	64f2aa8	2025-05-14 18:31:05 +0000	[diff] [blame]	536	func (m mockConvoInterface) ToolResultContents(ctx context.Context, resp llm.Response) ([]llm.Content, bool, error) {
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	537	if m.ToolResultContentsFunc != nil {
				538	return m.ToolResultContentsFunc(ctx, resp)
				539	}
Josh Bleecher Snyder	64f2aa8	2025-05-14 18:31:05 +0000	[diff] [blame]	540	return []llm.Content{}, false, nil
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	541	}
				542
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	543	func (m mockConvoInterface) ToolResultCancelContents(resp llm.Response) ([]llm.Content, error) {
				544	return []llm.Content{llm.StringContent("Tool use cancelled")}, nil
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	545	}
				546
				547	func (m *mockConvoInterface) CancelToolUse(toolUseID string, cause error) error {
				548	return nil
				549	}
				550
Philip Zeyliger	43a0bfc	2025-07-14 14:54:27 -0700	[diff] [blame]	551	func (m *mockConvoInterface) DebugJSON() ([]byte, error) {
				552	return []byte(`[{"role": "user", "content": [{"type": "text", "text": "mock conversation"}]}]`), nil
				553	}
				554
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	555	func TestAgentProcessTurnStateTransitions(t *testing.T) {
				556	// Create a mock ConvoInterface for testing
				557	mockConvo := &mockConvoInterface{}
				558
				559	// Use the testing context
				560	ctx := t.Context()
				561
				562	// Create an agent with the state machine
				563	agent := &Agent{
Philip Zeyliger	f287299	2025-05-22 10:35:28 -0700	[diff] [blame]	564	convo: mockConvo,
				565	config: AgentConfig{Context: ctx},
				566	inbox: make(chan string, 10),
				567	ready: make(chan struct{}),
				568
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	569	outstandingLLMCalls: make(map[string]struct{}),
				570	outstandingToolCalls: make(map[string]string),
				571	stateMachine: NewStateMachine(),
				572	startOfTurn: time.Now(),
Philip Zeyliger	9373c07	2025-05-01 10:27:01 -0700	[diff] [blame]	573	subscribers: []chan *AgentMessage{},
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	574	}
				575
				576	// Verify initial state
				577	if state := agent.CurrentState(); state != StateReady {
				578	t.Errorf("Expected initial state to be StateReady, got %s", state)
				579	}
				580
				581	// Add a message to the inbox so we don't block in GatherMessages
				582	agent.inbox <- "Test message"
				583
				584	// Setup the mock to simulate a model response with end of turn
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	585	mockConvo.SendMessageFunc = func(message llm.Message) (*llm.Response, error) {
				586	return &llm.Response{
				587	StopReason: llm.StopReasonEndTurn,
				588	Content: []llm.Content{
				589	llm.StringContent("This is a test response"),
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	590	},
				591	}, nil
				592	}
				593
				594	// Track state transitions
				595	var transitions []State
				596	agent.stateMachine.SetTransitionCallback(func(ctx context.Context, from, to State, event TransitionEvent) {
				597	transitions = append(transitions, to)
				598	t.Logf("State transition: %s -> %s (%s)", from, to, event.Description)
				599	})
				600
				601	// Process a turn, which should trigger state transitions
				602	agent.processTurn(ctx)
				603
				604	// The minimum expected states for a simple end-of-turn response
				605	minExpectedStates := []State{
				606	StateWaitingForUserInput,
				607	StateSendingToLLM,
				608	StateProcessingLLMResponse,
				609	StateEndOfTurn,
				610	}
				611
				612	// Verify we have at least the minimum expected states
				613	if len(transitions) < len(minExpectedStates) {
				614	t.Errorf("Expected at least %d state transitions, got %d", len(minExpectedStates), len(transitions))
				615	}
				616
				617	// Check that the transitions follow the expected sequence
				618	for i, expected := range minExpectedStates {
				619	if i < len(transitions) {
				620	if transitions[i] != expected {
				621	t.Errorf("Transition %d: expected %s, got %s", i, expected, transitions[i])
				622	}
				623	}
				624	}
				625
				626	// Verify the final state is EndOfTurn
				627	if state := agent.CurrentState(); state != StateEndOfTurn {
				628	t.Errorf("Expected final state to be StateEndOfTurn, got %s", state)
				629	}
				630	}
				631
				632	func TestAgentProcessTurnWithToolUse(t *testing.T) {
				633	// Create a mock ConvoInterface for testing
				634	mockConvo := &mockConvoInterface{}
				635
				636	// Setup a test context
				637	ctx := context.Background()
				638
				639	// Create an agent with the state machine
				640	agent := &Agent{
Philip Zeyliger	f287299	2025-05-22 10:35:28 -0700	[diff] [blame]	641	convo: mockConvo,
				642	config: AgentConfig{Context: ctx},
				643	inbox: make(chan string, 10),
				644	ready: make(chan struct{}),
				645
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	646	outstandingLLMCalls: make(map[string]struct{}),
				647	outstandingToolCalls: make(map[string]string),
				648	stateMachine: NewStateMachine(),
				649	startOfTurn: time.Now(),
Philip Zeyliger	9373c07	2025-05-01 10:27:01 -0700	[diff] [blame]	650	subscribers: []chan *AgentMessage{},
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	651	}
				652
				653	// Add a message to the inbox so we don't block in GatherMessages
				654	agent.inbox <- "Test message"
				655
				656	// First response requests a tool
				657	firstResponseDone := false
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	658	mockConvo.SendMessageFunc = func(message llm.Message) (*llm.Response, error) {
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	659	if !firstResponseDone {
				660	firstResponseDone = true
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	661	return &llm.Response{
				662	StopReason: llm.StopReasonToolUse,
				663	Content: []llm.Content{
				664	llm.StringContent("I'll use a tool"),
				665	{Type: llm.ContentTypeToolUse, ToolName: "test_tool", ToolInput: []byte("{}"), ID: "test_id"},
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	666	},
				667	}, nil
				668	}
				669	// Second response ends the turn
Josh Bleecher Snyder	4f84ab7	2025-04-22 16:40:54 -0700	[diff] [blame]	670	return &llm.Response{
				671	StopReason: llm.StopReasonEndTurn,
				672	Content: []llm.Content{
				673	llm.StringContent("Finished using the tool"),
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	674	},
				675	}, nil
				676	}
				677
				678	// Tool result content handler
Josh Bleecher Snyder	64f2aa8	2025-05-14 18:31:05 +0000	[diff] [blame]	679	mockConvo.ToolResultContentsFunc = func(ctx context.Context, resp *llm.Response) ([]llm.Content, bool, error) {
				680	return []llm.Content{llm.StringContent("Tool executed successfully")}, false, nil
Sean McCullough	96b60dd	2025-04-30 09:49:10 -0700	[diff] [blame]	681	}
				682
				683	// Track state transitions
				684	var transitions []State
				685	agent.stateMachine.SetTransitionCallback(func(ctx context.Context, from, to State, event TransitionEvent) {
				686	transitions = append(transitions, to)
				687	t.Logf("State transition: %s -> %s (%s)", from, to, event.Description)
				688	})
				689
				690	// Process a turn with tool use
				691	agent.processTurn(ctx)
				692
				693	// Define expected states for a tool use flow
				694	expectedToolStates := []State{
				695	StateWaitingForUserInput,
				696	StateSendingToLLM,
				697	StateProcessingLLMResponse,
				698	StateToolUseRequested,
				699	StateCheckingForCancellation,
				700	StateRunningTool,
				701	}
				702
				703	// Verify that these states are present in order
				704	for i, expectedState := range expectedToolStates {
				705	if i >= len(transitions) {
				706	t.Errorf("Missing expected transition to %s; only got %d transitions", expectedState, len(transitions))
				707	continue
				708	}
				709	if transitions[i] != expectedState {
				710	t.Errorf("Expected transition %d to be %s, got %s", i, expectedState, transitions[i])
				711	}
				712	}
				713
				714	// Also verify we eventually reached EndOfTurn
				715	if !slices.Contains(transitions, StateEndOfTurn) {
				716	t.Errorf("Expected to eventually reach StateEndOfTurn, but never did")
				717	}
				718	}
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	719
Philip Zeyliger	72252cb	2025-05-10 17:00:08 -0700	[diff] [blame]	720	func TestPushToOutbox(t *testing.T) {
				721	// Create a new agent
				722	a := &Agent{
				723	outstandingLLMCalls: make(map[string]struct{}),
				724	outstandingToolCalls: make(map[string]string),
				725	stateMachine: NewStateMachine(),
				726	subscribers: make([]chan *AgentMessage, 0),
				727	}
				728
				729	// Create a channel to receive messages
				730	messageCh := make(chan *AgentMessage, 1)
				731
				732	// Add the channel to the subscribers list
				733	a.mu.Lock()
				734	a.subscribers = append(a.subscribers, messageCh)
				735	a.mu.Unlock()
				736
				737	// We need to set the text that would be produced by our modified contentToString function
				738	resultText := "test resultnested result" // Directly set the expected output
				739
				740	// In a real-world scenario, this would be coming from a toolResult that contained nested content
				741
				742	m := AgentMessage{
				743	Type: ToolUseMessageType,
				744	ToolResult: resultText,
				745	}
				746
				747	// Push the message to the outbox
				748	a.pushToOutbox(context.Background(), m)
				749
				750	// Receive the message from the subscriber
				751	received := <-messageCh
				752
				753	// Check that the Content field contains the concatenated text from ToolResult
				754	expected := "test resultnested result"
				755	if received.Content != expected {
				756	t.Errorf("Expected Content to be %q, got %q", expected, received.Content)
				757	}
				758	}
Josh Bleecher Snyder	3b44cc3	2025-07-22 02:28:14 +0000	[diff] [blame]	759
				760	// TestCleanSlugName tests the slug cleaning function
				761	func TestCleanSlugName(t *testing.T) {
				762	tests := []struct {
				763	name string
				764	input string
				765	want string
				766	}{
				767	{"simple lowercase", "fix-bug", "fix-bug"},
				768	{"uppercase to lowercase", "FIX-BUG", "fix-bug"},
				769	{"spaces to hyphens", "fix login bug", "fix-login-bug"},
				770	{"mixed case and spaces", "Fix Login Bug", "fix-login-bug"},
				771	{"special characters removed", "fix_bug@home!", "fixbughome"},
				772	{"multiple hyphens preserved", "fix--bug---here", "fix--bug---here"},
				773	{"leading/trailing hyphens preserved", "-fix-bug-", "-fix-bug-"},
				774	{"numbers preserved", "fix-bug-v2", "fix-bug-v2"},
				775	{"empty string", "", ""},
				776	{"only special chars", "@#$%", ""},
				777	}
				778
				779	for _, tt := range tests {
				780	t.Run(tt.name, func(t *testing.T) {
				781	got := cleanSlugName(tt.input)
				782	if got != tt.want {
				783	t.Errorf("cleanSlugName(%q) = %q, want %q", tt.input, got, tt.want)
				784	}
				785	})
				786	}
				787	}
				788
				789	// TestAutoGenerateSlugInputValidation tests input validation for auto slug generation
				790	func TestAutoGenerateSlugInputValidation(t *testing.T) {
				791	// Test soleText with empty input
				792	emptyContents := []llm.Content{}
				793	_, err := soleText(emptyContents)
				794	if err == nil {
				795	t.Errorf("Expected error for empty contents, got nil")
				796	}
				797
				798	// Test with non-text content only
				799	nonTextContents := []llm.Content{
				800	{Type: llm.ContentTypeToolUse, ToolName: "bash"},
				801	}
				802	_, err = soleText(nonTextContents)
				803	if err == nil {
				804	t.Errorf("Expected error for non-text contents, got nil")
				805	}
				806
				807	// Test slug formatting
				808	testInputs := []string{
				809	"Fix the login bug",
				810	"Add user authentication system",
				811	"Refactor API endpoints",
				812	"Update documentation",
				813	}
				814
				815	for _, input := range testInputs {
				816	slug := cleanSlugName(strings.ToLower(strings.ReplaceAll(input, " ", "-")))
				817	if slug == "" {
				818	t.Errorf("cleanSlugName produced empty result for input %q", input)
				819	}
				820	if !strings.Contains(slug, "-") {
				821	// We expect most multi-word inputs to contain hyphens after processing
				822	t.Logf("Input %q produced slug %q (no hyphen found, might be single word)", input, slug)
				823	}
				824	}
				825	}
				826
				827	// TestSoleText tests the soleText helper function
				828	func TestSoleText(t *testing.T) {
				829	tests := []struct {
				830	name string
				831	contents []llm.Content
				832	wantText string
				833	wantErr bool
				834	}{
				835	{
				836	name: "single text content",
				837	contents: []llm.Content{
				838	{Type: llm.ContentTypeText, Text: " Hello world "},
				839	},
				840	wantText: "Hello world",
				841	wantErr: false,
				842	},
				843	{
				844	name: "empty slice",
				845	contents: []llm.Content{},
				846	wantText: "",
				847	wantErr: true,
				848	},
				849	{
				850	name: "multiple contents",
				851	contents: []llm.Content{
				852	{Type: llm.ContentTypeText, Text: "First"},
				853	{Type: llm.ContentTypeText, Text: "Second"},
				854	},
				855	wantText: "",
				856	wantErr: true,
				857	},
				858	{
				859	name: "non-text content",
				860	contents: []llm.Content{
				861	{Type: llm.ContentTypeToolUse, ToolName: "bash"},
				862	},
				863	wantText: "",
				864	wantErr: true,
				865	},
				866	{
				867	name: "empty text content",
				868	contents: []llm.Content{
				869	{Type: llm.ContentTypeText, Text: ""},
				870	},
				871	wantText: "",
				872	wantErr: true,
				873	},
				874	}
				875
				876	for _, tt := range tests {
				877	t.Run(tt.name, func(t *testing.T) {
				878	gotText, err := soleText(tt.contents)
				879	if (err != nil) != tt.wantErr {
				880	t.Errorf("soleText() error = %v, wantErr %v", err, tt.wantErr)
				881	return
				882	}
				883	if gotText != tt.wantText {
				884	t.Errorf("soleText() gotText = %v, want %v", gotText, tt.wantText)
				885	}
				886	})
				887	}
				888	}